diff mbox series

Fix iconv buffer handling with IGNORE error handler (bug #18830)

Message ID mvm7eacdklm.fsf@suse.de
State New
Headers show
Series Fix iconv buffer handling with IGNORE error handler (bug #18830) | expand

Commit Message

Andreas Schwab May 27, 2019, 2:43 p.m. UTC
[BZ #18830]
	* iconv/skeleton.c (FUNCTION_NAME): Use RESET_INPUT_BUFFER only if
	no irreversible characters occurred.
	* iconv/gconv_simple.c (internal_ucs4_loop)
	(internal_ucs4_loop_unaligned, internal_ucs4_loop_single)
	(ucs4_internal_loop, ucs4_internal_loop_unaligned)
	(ucs4_internal_loop_single, internal_ucs4le_loop)
	(internal_ucs4le_loop_unaligned, internal_ucs4le_loop_single)
	(ucs4le_internal_loop, ucs4le_internal_loop_unaligned)
	(ucs4le_internal_loop_single): Add const to outend.
	* sysdeps/s390/multiarch/gconv_simple.c (internal_ucs4le_loop)
	(ucs4_internal_loop, ucs4le_internal_loop): Likewise.
	* iconv/Makefile (tests): Add tst-iconv7.
	* iconv/tst-iconv7.c: New file.
---
 iconv/Makefile                        |  2 +-
 iconv/gconv_simple.c                  | 32 ++++++++-----
 iconv/skeleton.c                      | 28 ++++++++---
 iconv/tst-iconv7.c                    | 67 +++++++++++++++++++++++++++
 sysdeps/s390/multiarch/gconv_simple.c |  6 +--
 5 files changed, 112 insertions(+), 23 deletions(-)
 create mode 100644 iconv/tst-iconv7.c

Comments

Carlos O'Donell May 27, 2019, 9:32 p.m. UTC | #1
On 5/27/19 10:43 AM, Andreas Schwab wrote:
> 	[BZ #18830]
> 	* iconv/skeleton.c (FUNCTION_NAME): Use RESET_INPUT_BUFFER only if
> 	no irreversible characters occurred.
> 	* iconv/gconv_simple.c (internal_ucs4_loop)
> 	(internal_ucs4_loop_unaligned, internal_ucs4_loop_single)
> 	(ucs4_internal_loop, ucs4_internal_loop_unaligned)
> 	(ucs4_internal_loop_single, internal_ucs4le_loop)
> 	(internal_ucs4le_loop_unaligned, internal_ucs4le_loop_single)
> 	(ucs4le_internal_loop, ucs4le_internal_loop_unaligned)
> 	(ucs4le_internal_loop_single): Add const to outend.
> 	* sysdeps/s390/multiarch/gconv_simple.c (internal_ucs4le_loop)
> 	(ucs4_internal_loop, ucs4le_internal_loop): Likewise.
> 	* iconv/Makefile (tests): Add tst-iconv7.
> 	* iconv/tst-iconv7.c: New file.

The code changes look correct.

Please review my suggestions and corrections and post a v2.

In summary:
- Test case needs more comments.
- iconv/skeleton.c should have a comment in RESET_INPUT_BUFFER
  talking about it not being called for irreversible conversions.
- Please review my analysis and conclude if we agree on the solution.

> ---
>  iconv/Makefile                        |  2 +-
>  iconv/gconv_simple.c                  | 32 ++++++++-----
>  iconv/skeleton.c                      | 28 ++++++++---
>  iconv/tst-iconv7.c                    | 67 +++++++++++++++++++++++++++
>  sysdeps/s390/multiarch/gconv_simple.c |  6 +--
>  5 files changed, 112 insertions(+), 23 deletions(-)
>  create mode 100644 iconv/tst-iconv7.c
> 
> diff --git a/iconv/Makefile b/iconv/Makefile
> index f6631e861d..74cd9bf860 100644
> --- a/iconv/Makefile
> +++ b/iconv/Makefile
> @@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION
>  CFLAGS-simple-hash.c += -I../locale
>  
>  tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
> -	  tst-iconv-mt
> +	  tst-iconv7 tst-iconv-mt

OK. Thanks for adding a test case.

>  
>  others		= iconv_prog iconvconfig
>  install-others-programs	= $(inst_bindir)/iconv
> diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
> index 35aaa8aacd..75ce8fb1f4 100644
> --- a/iconv/gconv_simple.c
> +++ b/iconv/gconv_simple.c
> @@ -76,7 +76,7 @@ __attribute ((always_inline))
>  internal_ucs4_loop (struct __gconv_step *step,
>  		    struct __gconv_step_data *step_data,
>  		    const unsigned char **inptrp, const unsigned char *inend,
> -		    unsigned char **outptrp, unsigned char *outend,
> +		    unsigned char **outptrp, const unsigned char *outend,
>  		    size_t *irreversible)
>  {
>    const unsigned char *inptr = *inptrp;
> @@ -120,7 +120,8 @@ internal_ucs4_loop_unaligned (struct __gconv_step *step,
>  			      struct __gconv_step_data *step_data,
>  			      const unsigned char **inptrp,
>  			      const unsigned char *inend,
> -			      unsigned char **outptrp, unsigned char *outend,
> +			      unsigned char **outptrp,
> +			      const unsigned char *outend,
>  			      size_t *irreversible)
>  {
>    const unsigned char *inptr = *inptrp;
> @@ -169,7 +170,8 @@ internal_ucs4_loop_single (struct __gconv_step *step,
>  			   struct __gconv_step_data *step_data,
>  			   const unsigned char **inptrp,
>  			   const unsigned char *inend,
> -			   unsigned char **outptrp, unsigned char *outend,
> +			   unsigned char **outptrp,
> +			   const unsigned char *outend,
>  			   size_t *irreversible)
>  {
>    mbstate_t *state = step_data->__statep;
> @@ -231,7 +233,7 @@ __attribute ((always_inline))
>  ucs4_internal_loop (struct __gconv_step *step,
>  		    struct __gconv_step_data *step_data,
>  		    const unsigned char **inptrp, const unsigned char *inend,
> -		    unsigned char **outptrp, unsigned char *outend,
> +		    unsigned char **outptrp, const unsigned char *outend,
>  		    size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> @@ -298,7 +300,8 @@ ucs4_internal_loop_unaligned (struct __gconv_step *step,
>  			      struct __gconv_step_data *step_data,
>  			      const unsigned char **inptrp,
>  			      const unsigned char *inend,
> -			      unsigned char **outptrp, unsigned char *outend,
> +			      unsigned char **outptrp,
> +			      const unsigned char *outend,
>  			      size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> @@ -368,7 +371,8 @@ ucs4_internal_loop_single (struct __gconv_step *step,
>  			   struct __gconv_step_data *step_data,
>  			   const unsigned char **inptrp,
>  			   const unsigned char *inend,
> -			   unsigned char **outptrp, unsigned char *outend,
> +			   unsigned char **outptrp,
> +			   const unsigned char *outend,
>  			   size_t *irreversible)
>  {
>    mbstate_t *state = step_data->__statep;
> @@ -443,7 +447,7 @@ __attribute ((always_inline))
>  internal_ucs4le_loop (struct __gconv_step *step,
>  		      struct __gconv_step_data *step_data,
>  		      const unsigned char **inptrp, const unsigned char *inend,
> -		      unsigned char **outptrp, unsigned char *outend,
> +		      unsigned char **outptrp, const unsigned char *outend,
>  		      size_t *irreversible)
>  {
>    const unsigned char *inptr = *inptrp;
> @@ -488,7 +492,8 @@ internal_ucs4le_loop_unaligned (struct __gconv_step *step,
>  				struct __gconv_step_data *step_data,
>  				const unsigned char **inptrp,
>  				const unsigned char *inend,
> -				unsigned char **outptrp, unsigned char *outend,
> +				unsigned char **outptrp,
> +				const unsigned char *outend,
>  				size_t *irreversible)
>  {
>    const unsigned char *inptr = *inptrp;
> @@ -540,7 +545,8 @@ internal_ucs4le_loop_single (struct __gconv_step *step,
>  			     struct __gconv_step_data *step_data,
>  			     const unsigned char **inptrp,
>  			     const unsigned char *inend,
> -			     unsigned char **outptrp, unsigned char *outend,
> +			     unsigned char **outptrp,
> +			     const unsigned char *outend,
>  			     size_t *irreversible)
>  {
>    mbstate_t *state = step_data->__statep;
> @@ -601,7 +607,7 @@ __attribute ((always_inline))
>  ucs4le_internal_loop (struct __gconv_step *step,
>  		      struct __gconv_step_data *step_data,
>  		      const unsigned char **inptrp, const unsigned char *inend,
> -		      unsigned char **outptrp, unsigned char *outend,
> +		      unsigned char **outptrp, const unsigned char *outend,
>  		      size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> @@ -671,7 +677,8 @@ ucs4le_internal_loop_unaligned (struct __gconv_step *step,
>  				struct __gconv_step_data *step_data,
>  				const unsigned char **inptrp,
>  				const unsigned char *inend,
> -				unsigned char **outptrp, unsigned char *outend,
> +				unsigned char **outptrp,
> +				const unsigned char *outend,
>  				size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> @@ -745,7 +752,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
>  			     struct __gconv_step_data *step_data,
>  			     const unsigned char **inptrp,
>  			     const unsigned char *inend,
> -			     unsigned char **outptrp, unsigned char *outend,
> +			     unsigned char **outptrp,
> +			     const unsigned char *outend,
>  			     size_t *irreversible)

Why are we changing this in this patch?

>  {
>    mbstate_t *state = step_data->__statep;
> diff --git a/iconv/skeleton.c b/iconv/skeleton.c
> index cc39fdcc70..d43fac7683 100644
> --- a/iconv/skeleton.c
> +++ b/iconv/skeleton.c
> @@ -597,6 +597,10 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  	  inptr = *inptrp;
>  	  /* The outbuf buffer is empty.  */
>  	  outstart = outbuf;
> +#ifdef RESET_INPUT_BUFFER
> +	  size_t loop_irreversible
> +	    = lirreversible + (irreversible ? *irreversible : 0);
> +#endif

lirreversible is counting the number of characters actually converted in an irreversible
way for the local calls to a conversion.

So this change adds another variable 'loop_irreversible' which adds together the total
global count of irreversible conversions and the current local count of irreversible
conversions.

>  
>  #ifdef SAVE_RESET_STATE
>  	  SAVE_RESET_STATE (1);
> @@ -671,8 +675,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  		  if (__glibc_unlikely (outerr != outbuf))
>  		    {
>  #ifdef RESET_INPUT_BUFFER
> -		      RESET_INPUT_BUFFER;
> -#else
> +		      if (loop_irreversible
> +			  == lirreversible + (irreversible ? *irreversible : 0))

Then we look to see if we had any irreversible conversions.

> +			{
> +			  /* RESET_INPUT_BUFFER can only work if there
> +			     were no irreversible characters during the
> +			     last loop.  */
> +			  RESET_INPUT_BUFFER;
> +			  goto done_reset;

If we did then we can reset the input buffer, otherwise we can't.

> +			}
> +#endif

This enables a lot of code which was previously only for no definition of 
RESET_INPUT_BUFFER. Why do you make this change?

I agree that it seems like defining RESET_INPUT_BUFFER should not prevent
us from attempting to undo the the conversions. Was this your conclusion also?

I agree that using RESET_INPUT_BUFFER should be the first thing we attempt
since it should allow us reset the input without the least cost, and that we
should not attempt to use it if irreversible conversions were handled.

I think that this change actually needs a change to the REST_INPUT_BUFFER
comment description, something like "The macro RESET_INPUT_BUFFER will not
be called if irreversible conversions were processed (possible due to 
the use of IGNORE)."

>  		      /* We have a problem in one of the functions below.
>  			 Undo the conversion upto the error point.  */
>  		      size_t nstatus __attribute__ ((unused));
> @@ -682,9 +694,9 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  		      outbuf = outstart;
>  
>  		      /* Restore the state.  */
> -# ifdef SAVE_RESET_STATE
> +#ifdef SAVE_RESET_STATE
>  		      SAVE_RESET_STATE (0);

OK.

> -# endif
> +#endif

OK.

>  
>  		      if (__glibc_likely (!unaligned))
>  			{
> @@ -701,7 +713,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  					       lirreversiblep
>  					       EXTRA_LOOP_ARGS);
>  			}
> -# if POSSIBLY_UNALIGNED
> +#if POSSIBLY_UNALIGNED

OK.

>  		      else
>  			{
>  			  if (FROM_DIRECTION)
> @@ -720,7 +732,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  							       lirreversiblep
>  							       EXTRA_LOOP_ARGS);
>  			}
> -# endif
> +#endif

OK.

>  
>  		      /* We must run out of output buffer space in this
>  			 rerun.  */
> @@ -731,9 +743,11 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>  			 the invocation counter.  */
>  		      if (__glibc_unlikely (outbuf == outstart))
>  			--data->__invocation_counter;
> -#endif	/* reset input buffer */
>  		    }
>  
> +#ifdef RESET_INPUT_BUFFER
> +		done_reset:
> +#endif
>  		  /* Change the status.  */
>  		  status = result;
>  		}
> diff --git a/iconv/tst-iconv7.c b/iconv/tst-iconv7.c
> new file mode 100644
> index 0000000000..ee793d2f3e
> --- /dev/null
> +++ b/iconv/tst-iconv7.c
> @@ -0,0 +1,67 @@
> +/* Test iconv buffer handling with the IGNORE error handler.

OK.

> +   Copyright (C) 2017 Free Software Foundation, Inc.

Update year to 2019 please.

> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Derived from BZ #18830 */

OK.

> +#include <errno.h>
> +#include <iconv.h>
> +#include <stdio.h>
> +
> +
> +static int
> +do_test (void)
> +{
> +  iconv_t cd = iconv_open ("ASCII//IGNORE", "ASCII");

OK.

> +  if (cd == (iconv_t) -1)
> +    {
> +      puts ("iconv_open failed");
> +      return 1;
> +    }
> +


> +  char input[5 + 3] = { 0, 0, 0, 0, 0, '1', '\200', '2' };

Why this sequence? What is special about it. Please add a comment.

> +  char *inptr = input;
> +  size_t insize = sizeof (input);
> +  char output[5];

Needs a clear comment why we have an output buffer smaller than the input buffer.

> +  char *outptr = output;
> +  size_t outsize = sizeof (output);
> +
> +  size_t ret = iconv (cd, &inptr, &insize, &outptr, &outsize);
> +  if (ret != (size_t) -1)
> +    {
> +      puts ("iconv succeeded");
> +      return 1;
> +    }

Needs a comment explaining why it should fail.

> +  if (errno != E2BIG)
> +    {
> +      puts ("iconv did not set errno to E2BIG");
> +      return 1;
> +    }
> +  if (inptr != input + sizeof (output) - outsize)

Why do we want this check?

> +    {
> +      printf ("iconv consumed %td characters\n", inptr - input);
> +      return 1;
> +    }
> +
> +  if (iconv_close (cd) == -1)
> +    {
> +      puts ("iconv_close failed");
> +      return 1;
> +    }
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
> index ce7bcf541e..2861b6dacb 100644
> --- a/sysdeps/s390/multiarch/gconv_simple.c
> +++ b/sysdeps/s390/multiarch/gconv_simple.c
> @@ -404,7 +404,7 @@ ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
>  				      const unsigned char **inptrp,
>  				      const unsigned char *inend,
>  				      unsigned char **outptrp,
> -				      unsigned char *outend,
> +				      const unsigned char *outend,
>  				      size_t *irreversible)
>  {
>    const unsigned char *inptr = *inptrp;
> @@ -504,7 +504,7 @@ ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
>  				    const unsigned char **inptrp,
>  				    const unsigned char *inend,
>  				    unsigned char **outptrp,
> -				    unsigned char *outend,
> +				    const unsigned char *outend,
>  				    size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> @@ -631,7 +631,7 @@ ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
>  				      const unsigned char **inptrp,
>  				      const unsigned char *inend,
>  				      unsigned char **outptrp,
> -				      unsigned char *outend,
> +				      const unsigned char *outend,
>  				      size_t *irreversible)
>  {
>    int flags = step_data->__flags;
> 

Thanks. Looking forward to v2.
Andreas Schwab May 28, 2019, 2:16 p.m. UTC | #2
On Mai 27 2019, Carlos O'Donell <carlos@redhat.com> wrote:

>> @@ -745,7 +752,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
>>  			     struct __gconv_step_data *step_data,
>>  			     const unsigned char **inptrp,
>>  			     const unsigned char *inend,
>> -			     unsigned char **outptrp, unsigned char *outend,
>> +			     unsigned char **outptrp,
>> +			     const unsigned char *outend,
>>  			     size_t *irreversible)
>
> Why are we changing this in this patch?

They don't conform to the interface defined by iconv/loop.c, last
changed by commit 17427edd1f.  It wasn't a problem before, because noone
has passed a const pointer.

> This enables a lot of code which was previously only for no definition of 
> RESET_INPUT_BUFFER. Why do you make this change?

Because without being able to use RESET_INPUT_BUFFER we need to use the
fallback, as if RESET_INPUT_BUFFER was undefined.

Note that none of our conversion modules actually define
RESET_INPUT_BUFFER, so only the default definition in skeleton.c is ever
used, if at all.

Andreas.

	[BZ #18830]
	* iconv/skeleton.c (FUNCTION_NAME): Use RESET_INPUT_BUFFER only if
	no irreversible characters occurred.
	* iconv/gconv_simple.c (internal_ucs4_loop)
	(internal_ucs4_loop_unaligned, internal_ucs4_loop_single)
	(ucs4_internal_loop, ucs4_internal_loop_unaligned)
	(ucs4_internal_loop_single, internal_ucs4le_loop)
	(internal_ucs4le_loop_unaligned, internal_ucs4le_loop_single)
	(ucs4le_internal_loop, ucs4le_internal_loop_unaligned)
	(ucs4le_internal_loop_single): Add const to outend.
	* sysdeps/s390/multiarch/gconv_simple.c (internal_ucs4le_loop)
	(ucs4_internal_loop, ucs4le_internal_loop): Likewise.
	* iconv/Makefile (tests): Add tst-iconv7.
	* iconv/tst-iconv7.c: New file.
---
 iconv/Makefile                        |  2 +-
 iconv/gconv_simple.c                  | 32 ++++++++++------
 iconv/skeleton.c                      | 35 +++++++++++++----
 iconv/tst-iconv7.c                    | 55 +++++++++++++++++++++++++++
 sysdeps/s390/multiarch/gconv_simple.c |  6 +--
 5 files changed, 107 insertions(+), 23 deletions(-)
 create mode 100644 iconv/tst-iconv7.c

diff --git a/iconv/Makefile b/iconv/Makefile
index f6631e861d..74cd9bf860 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION
 CFLAGS-simple-hash.c += -I../locale
 
 tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
-	  tst-iconv-mt
+	  tst-iconv7 tst-iconv-mt
 
 others		= iconv_prog iconvconfig
 install-others-programs	= $(inst_bindir)/iconv
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 35aaa8aacd..75ce8fb1f4 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -76,7 +76,7 @@ __attribute ((always_inline))
 internal_ucs4_loop (struct __gconv_step *step,
 		    struct __gconv_step_data *step_data,
 		    const unsigned char **inptrp, const unsigned char *inend,
-		    unsigned char **outptrp, unsigned char *outend,
+		    unsigned char **outptrp, const unsigned char *outend,
 		    size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -120,7 +120,8 @@ internal_ucs4_loop_unaligned (struct __gconv_step *step,
 			      struct __gconv_step_data *step_data,
 			      const unsigned char **inptrp,
 			      const unsigned char *inend,
-			      unsigned char **outptrp, unsigned char *outend,
+			      unsigned char **outptrp,
+			      const unsigned char *outend,
 			      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -169,7 +170,8 @@ internal_ucs4_loop_single (struct __gconv_step *step,
 			   struct __gconv_step_data *step_data,
 			   const unsigned char **inptrp,
 			   const unsigned char *inend,
-			   unsigned char **outptrp, unsigned char *outend,
+			   unsigned char **outptrp,
+			   const unsigned char *outend,
 			   size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -231,7 +233,7 @@ __attribute ((always_inline))
 ucs4_internal_loop (struct __gconv_step *step,
 		    struct __gconv_step_data *step_data,
 		    const unsigned char **inptrp, const unsigned char *inend,
-		    unsigned char **outptrp, unsigned char *outend,
+		    unsigned char **outptrp, const unsigned char *outend,
 		    size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -298,7 +300,8 @@ ucs4_internal_loop_unaligned (struct __gconv_step *step,
 			      struct __gconv_step_data *step_data,
 			      const unsigned char **inptrp,
 			      const unsigned char *inend,
-			      unsigned char **outptrp, unsigned char *outend,
+			      unsigned char **outptrp,
+			      const unsigned char *outend,
 			      size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -368,7 +371,8 @@ ucs4_internal_loop_single (struct __gconv_step *step,
 			   struct __gconv_step_data *step_data,
 			   const unsigned char **inptrp,
 			   const unsigned char *inend,
-			   unsigned char **outptrp, unsigned char *outend,
+			   unsigned char **outptrp,
+			   const unsigned char *outend,
 			   size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -443,7 +447,7 @@ __attribute ((always_inline))
 internal_ucs4le_loop (struct __gconv_step *step,
 		      struct __gconv_step_data *step_data,
 		      const unsigned char **inptrp, const unsigned char *inend,
-		      unsigned char **outptrp, unsigned char *outend,
+		      unsigned char **outptrp, const unsigned char *outend,
 		      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -488,7 +492,8 @@ internal_ucs4le_loop_unaligned (struct __gconv_step *step,
 				struct __gconv_step_data *step_data,
 				const unsigned char **inptrp,
 				const unsigned char *inend,
-				unsigned char **outptrp, unsigned char *outend,
+				unsigned char **outptrp,
+				const unsigned char *outend,
 				size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -540,7 +545,8 @@ internal_ucs4le_loop_single (struct __gconv_step *step,
 			     struct __gconv_step_data *step_data,
 			     const unsigned char **inptrp,
 			     const unsigned char *inend,
-			     unsigned char **outptrp, unsigned char *outend,
+			     unsigned char **outptrp,
+			     const unsigned char *outend,
 			     size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -601,7 +607,7 @@ __attribute ((always_inline))
 ucs4le_internal_loop (struct __gconv_step *step,
 		      struct __gconv_step_data *step_data,
 		      const unsigned char **inptrp, const unsigned char *inend,
-		      unsigned char **outptrp, unsigned char *outend,
+		      unsigned char **outptrp, const unsigned char *outend,
 		      size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -671,7 +677,8 @@ ucs4le_internal_loop_unaligned (struct __gconv_step *step,
 				struct __gconv_step_data *step_data,
 				const unsigned char **inptrp,
 				const unsigned char *inend,
-				unsigned char **outptrp, unsigned char *outend,
+				unsigned char **outptrp,
+				const unsigned char *outend,
 				size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -745,7 +752,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 			     struct __gconv_step_data *step_data,
 			     const unsigned char **inptrp,
 			     const unsigned char *inend,
-			     unsigned char **outptrp, unsigned char *outend,
+			     unsigned char **outptrp,
+			     const unsigned char *outend,
 			     size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index cc39fdcc70..18890385e7 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -83,6 +83,11 @@
      RESET_INPUT_BUFFER	If the input character sets allow this the macro
 			can be defined to reset the input buffer pointers
 			to cover only those characters up to the error.
+			Note that if the conversion has skipped over
+			irreversible characters (due to
+			__GCONV_IGNORE_ERRORS) there is no longer a direct
+			correspondence between input and output pointers,
+			and this macro is not used.
 
      FUNCTION_NAME	if not set the conversion function is named `gconv'.
 
@@ -597,6 +602,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 	  inptr = *inptrp;
 	  /* The outbuf buffer is empty.  */
 	  outstart = outbuf;
+#ifdef RESET_INPUT_BUFFER
+	  /* Remember how many irreversible characters were skipped before
+	     this round.  */
+	  size_t loop_irreversible
+	    = lirreversible + (irreversible ? *irreversible : 0);
+#endif
 
 #ifdef SAVE_RESET_STATE
 	  SAVE_RESET_STATE (1);
@@ -671,8 +682,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		  if (__glibc_unlikely (outerr != outbuf))
 		    {
 #ifdef RESET_INPUT_BUFFER
-		      RESET_INPUT_BUFFER;
-#else
+		      /* RESET_INPUT_BUFFER can only work when there were
+			 no new irreversible characters skipped during
+			 this round.  */
+		      if (loop_irreversible
+			  == lirreversible + (irreversible ? *irreversible : 0))
+			{
+			  RESET_INPUT_BUFFER;
+			  goto done_reset;
+			}
+#endif
 		      /* We have a problem in one of the functions below.
 			 Undo the conversion upto the error point.  */
 		      size_t nstatus __attribute__ ((unused));
@@ -682,9 +701,9 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		      outbuf = outstart;
 
 		      /* Restore the state.  */
-# ifdef SAVE_RESET_STATE
+#ifdef SAVE_RESET_STATE
 		      SAVE_RESET_STATE (0);
-# endif
+#endif
 
 		      if (__glibc_likely (!unaligned))
 			{
@@ -701,7 +720,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 					       lirreversiblep
 					       EXTRA_LOOP_ARGS);
 			}
-# if POSSIBLY_UNALIGNED
+#if POSSIBLY_UNALIGNED
 		      else
 			{
 			  if (FROM_DIRECTION)
@@ -720,7 +739,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 							       lirreversiblep
 							       EXTRA_LOOP_ARGS);
 			}
-# endif
+#endif
 
 		      /* We must run out of output buffer space in this
 			 rerun.  */
@@ -731,9 +750,11 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 			 the invocation counter.  */
 		      if (__glibc_unlikely (outbuf == outstart))
 			--data->__invocation_counter;
-#endif	/* reset input buffer */
 		    }
 
+#ifdef RESET_INPUT_BUFFER
+		done_reset:
+#endif
 		  /* Change the status.  */
 		  status = result;
 		}
diff --git a/iconv/tst-iconv7.c b/iconv/tst-iconv7.c
new file mode 100644
index 0000000000..10372bf79f
--- /dev/null
+++ b/iconv/tst-iconv7.c
@@ -0,0 +1,55 @@
+/* Test iconv buffer handling with the IGNORE error handler.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Derived from BZ #18830 */
+#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  /* This conversion needs two steps, from ASCII to INTERNAL to ASCII.  */
+  iconv_t cd = iconv_open ("ASCII//IGNORE", "ASCII");
+  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+  /* Convert some irreversible sequence, enough to trigger an overflow of
+     the output buffer before the irreversible character in the second
+     step, but after going past the irreversible character in the first
+     step.  */
+  char input[4 + 4] = { '0', '1', '2', '3', '4', '5', '\266', '7' };
+  char *inptr = input;
+  size_t insize = sizeof (input);
+  char output[4];
+  char *outptr = output;
+  size_t outsize = sizeof (output);
+
+  /* The conversion should fail.  */
+  TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1);
+  TEST_VERIFY (errno == E2BIG);
+  /* The conversion should not consume more than it was able to store in
+     the output buffer.  */
+  TEST_COMPARE (inptr - input, sizeof (output) - outsize);
+
+  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
index ce7bcf541e..2861b6dacb 100644
--- a/sysdeps/s390/multiarch/gconv_simple.c
+++ b/sysdeps/s390/multiarch/gconv_simple.c
@@ -404,7 +404,7 @@ ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
 				      const unsigned char **inptrp,
 				      const unsigned char *inend,
 				      unsigned char **outptrp,
-				      unsigned char *outend,
+				      const unsigned char *outend,
 				      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -504,7 +504,7 @@ ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
 				    const unsigned char **inptrp,
 				    const unsigned char *inend,
 				    unsigned char **outptrp,
-				    unsigned char *outend,
+				    const unsigned char *outend,
 				    size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -631,7 +631,7 @@ ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
 				      const unsigned char **inptrp,
 				      const unsigned char *inend,
 				      unsigned char **outptrp,
-				      unsigned char *outend,
+				      const unsigned char *outend,
 				      size_t *irreversible)
 {
   int flags = step_data->__flags;
Carlos O'Donell June 4, 2019, 2:25 a.m. UTC | #3
On 5/28/19 10:16 AM, Andreas Schwab wrote:
> On Mai 27 2019, Carlos O'Donell <carlos@redhat.com> wrote:
> 
>>> @@ -745,7 +752,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
>>>   			     struct __gconv_step_data *step_data,
>>>   			     const unsigned char **inptrp,
>>>   			     const unsigned char *inend,
>>> -			     unsigned char **outptrp, unsigned char *outend,
>>> +			     unsigned char **outptrp,
>>> +			     const unsigned char *outend,
>>>   			     size_t *irreversible)
>>
>> Why are we changing this in this patch?
> 
> They don't conform to the interface defined by iconv/loop.c, last
> changed by commit 17427edd1f.  It wasn't a problem before, because noone
> has passed a const pointer.

Thanks.

>> This enables a lot of code which was previously only for no definition of
>> RESET_INPUT_BUFFER. Why do you make this change?
> 
> Because without being able to use RESET_INPUT_BUFFER we need to use the
> fallback, as if RESET_INPUT_BUFFER was undefined.

Right.

> Note that none of our conversion modules actually define
> RESET_INPUT_BUFFER, so only the default definition in skeleton.c is ever
> used, if at all.

Oh, wow, you're right, I hadn't noticed that.

> Andreas.
> 
> 	[BZ #18830]
> 	* iconv/skeleton.c (FUNCTION_NAME): Use RESET_INPUT_BUFFER only if
> 	no irreversible characters occurred.
> 	* iconv/gconv_simple.c (internal_ucs4_loop)
> 	(internal_ucs4_loop_unaligned, internal_ucs4_loop_single)
> 	(ucs4_internal_loop, ucs4_internal_loop_unaligned)
> 	(ucs4_internal_loop_single, internal_ucs4le_loop)
> 	(internal_ucs4le_loop_unaligned, internal_ucs4le_loop_single)
> 	(ucs4le_internal_loop, ucs4le_internal_loop_unaligned)
> 	(ucs4le_internal_loop_single): Add const to outend.
> 	* sysdeps/s390/multiarch/gconv_simple.c (internal_ucs4le_loop)
> 	(ucs4_internal_loop, ucs4le_internal_loop): Likewise.
> 	* iconv/Makefile (tests): Add tst-iconv7.
> 	* iconv/tst-iconv7.c: New file.

OK for master if you tweak the RESET_INPUT_BUFFER comment.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

> ---
>   iconv/Makefile                        |  2 +-
>   iconv/gconv_simple.c                  | 32 ++++++++++------
>   iconv/skeleton.c                      | 35 +++++++++++++----
>   iconv/tst-iconv7.c                    | 55 +++++++++++++++++++++++++++
>   sysdeps/s390/multiarch/gconv_simple.c |  6 +--
>   5 files changed, 107 insertions(+), 23 deletions(-)
>   create mode 100644 iconv/tst-iconv7.c
> 
> diff --git a/iconv/Makefile b/iconv/Makefile
> index f6631e861d..74cd9bf860 100644
> --- a/iconv/Makefile
> +++ b/iconv/Makefile
> @@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION
>   CFLAGS-simple-hash.c += -I../locale
>   
>   tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
> -	  tst-iconv-mt
> +	  tst-iconv7 tst-iconv-mt

OK.

>   
>   others		= iconv_prog iconvconfig
>   install-others-programs	= $(inst_bindir)/iconv
> diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
> index 35aaa8aacd..75ce8fb1f4 100644
> --- a/iconv/gconv_simple.c
> +++ b/iconv/gconv_simple.c
> @@ -76,7 +76,7 @@ __attribute ((always_inline))
>   internal_ucs4_loop (struct __gconv_step *step,
>   		    struct __gconv_step_data *step_data,
>   		    const unsigned char **inptrp, const unsigned char *inend,
> -		    unsigned char **outptrp, unsigned char *outend,
> +		    unsigned char **outptrp, const unsigned char *outend,
>   		    size_t *irreversible)
>   {
>     const unsigned char *inptr = *inptrp;
> @@ -120,7 +120,8 @@ internal_ucs4_loop_unaligned (struct __gconv_step *step,
>   			      struct __gconv_step_data *step_data,
>   			      const unsigned char **inptrp,
>   			      const unsigned char *inend,
> -			      unsigned char **outptrp, unsigned char *outend,
> +			      unsigned char **outptrp,
> +			      const unsigned char *outend,
>   			      size_t *irreversible)
>   {
>     const unsigned char *inptr = *inptrp;
> @@ -169,7 +170,8 @@ internal_ucs4_loop_single (struct __gconv_step *step,
>   			   struct __gconv_step_data *step_data,
>   			   const unsigned char **inptrp,
>   			   const unsigned char *inend,
> -			   unsigned char **outptrp, unsigned char *outend,
> +			   unsigned char **outptrp,
> +			   const unsigned char *outend,
>   			   size_t *irreversible)
>   {
>     mbstate_t *state = step_data->__statep;
> @@ -231,7 +233,7 @@ __attribute ((always_inline))
>   ucs4_internal_loop (struct __gconv_step *step,
>   		    struct __gconv_step_data *step_data,
>   		    const unsigned char **inptrp, const unsigned char *inend,
> -		    unsigned char **outptrp, unsigned char *outend,
> +		    unsigned char **outptrp, const unsigned char *outend,
>   		    size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> @@ -298,7 +300,8 @@ ucs4_internal_loop_unaligned (struct __gconv_step *step,
>   			      struct __gconv_step_data *step_data,
>   			      const unsigned char **inptrp,
>   			      const unsigned char *inend,
> -			      unsigned char **outptrp, unsigned char *outend,
> +			      unsigned char **outptrp,
> +			      const unsigned char *outend,
>   			      size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> @@ -368,7 +371,8 @@ ucs4_internal_loop_single (struct __gconv_step *step,
>   			   struct __gconv_step_data *step_data,
>   			   const unsigned char **inptrp,
>   			   const unsigned char *inend,
> -			   unsigned char **outptrp, unsigned char *outend,
> +			   unsigned char **outptrp,
> +			   const unsigned char *outend,
>   			   size_t *irreversible)
>   {
>     mbstate_t *state = step_data->__statep;
> @@ -443,7 +447,7 @@ __attribute ((always_inline))
>   internal_ucs4le_loop (struct __gconv_step *step,
>   		      struct __gconv_step_data *step_data,
>   		      const unsigned char **inptrp, const unsigned char *inend,
> -		      unsigned char **outptrp, unsigned char *outend,
> +		      unsigned char **outptrp, const unsigned char *outend,
>   		      size_t *irreversible)
>   {
>     const unsigned char *inptr = *inptrp;
> @@ -488,7 +492,8 @@ internal_ucs4le_loop_unaligned (struct __gconv_step *step,
>   				struct __gconv_step_data *step_data,
>   				const unsigned char **inptrp,
>   				const unsigned char *inend,
> -				unsigned char **outptrp, unsigned char *outend,
> +				unsigned char **outptrp,
> +				const unsigned char *outend,
>   				size_t *irreversible)
>   {
>     const unsigned char *inptr = *inptrp;
> @@ -540,7 +545,8 @@ internal_ucs4le_loop_single (struct __gconv_step *step,
>   			     struct __gconv_step_data *step_data,
>   			     const unsigned char **inptrp,
>   			     const unsigned char *inend,
> -			     unsigned char **outptrp, unsigned char *outend,
> +			     unsigned char **outptrp,
> +			     const unsigned char *outend,
>   			     size_t *irreversible)
>   {
>     mbstate_t *state = step_data->__statep;
> @@ -601,7 +607,7 @@ __attribute ((always_inline))
>   ucs4le_internal_loop (struct __gconv_step *step,
>   		      struct __gconv_step_data *step_data,
>   		      const unsigned char **inptrp, const unsigned char *inend,
> -		      unsigned char **outptrp, unsigned char *outend,
> +		      unsigned char **outptrp, const unsigned char *outend,
>   		      size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> @@ -671,7 +677,8 @@ ucs4le_internal_loop_unaligned (struct __gconv_step *step,
>   				struct __gconv_step_data *step_data,
>   				const unsigned char **inptrp,
>   				const unsigned char *inend,
> -				unsigned char **outptrp, unsigned char *outend,
> +				unsigned char **outptrp,
> +				const unsigned char *outend,
>   				size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> @@ -745,7 +752,8 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
>   			     struct __gconv_step_data *step_data,
>   			     const unsigned char **inptrp,
>   			     const unsigned char *inend,
> -			     unsigned char **outptrp, unsigned char *outend,
> +			     unsigned char **outptrp,
> +			     const unsigned char *outend,
>   			     size_t *irreversible)

OK.

>   {
>     mbstate_t *state = step_data->__statep;
> diff --git a/iconv/skeleton.c b/iconv/skeleton.c
> index cc39fdcc70..18890385e7 100644
> --- a/iconv/skeleton.c
> +++ b/iconv/skeleton.c
> @@ -83,6 +83,11 @@
>        RESET_INPUT_BUFFER	If the input character sets allow this the macro
>   			can be defined to reset the input buffer pointers
>   			to cover only those characters up to the error.
> +			Note that if the conversion has skipped over
> +			irreversible characters (due to
> +			__GCONV_IGNORE_ERRORS) there is no longer a direct
> +			correspondence between input and output pointers,
> +			and this macro is not used.

There is room for confusion here. The phrase "macro is not used" would in
my opinion be reserved for cases where the macro was not processed or
expanded at all. We process and expand the macro (if it's defined), but the
resulting expansion is unused if there were irreversible characters.

Suggest:
			Note that if the conversion has skipped over
			irreversible characters (due to
			__GCONV_IGNORE_ERRORS) there is no longer a direct
			correspondence between input and output pointers,
			and this macro function is not called.

OK with that change e.g. s/is not used/function is not called/g.

>   
>        FUNCTION_NAME	if not set the conversion function is named `gconv'.
>   
> @@ -597,6 +602,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   	  inptr = *inptrp;
>   	  /* The outbuf buffer is empty.  */
>   	  outstart = outbuf;
> +#ifdef RESET_INPUT_BUFFER
> +	  /* Remember how many irreversible characters were skipped before
> +	     this round.  */
> +	  size_t loop_irreversible
> +	    = lirreversible + (irreversible ? *irreversible : 0);
> +#endif

OK. Thanks for the comment.

>   
>   #ifdef SAVE_RESET_STATE
>   	  SAVE_RESET_STATE (1);
> @@ -671,8 +682,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   		  if (__glibc_unlikely (outerr != outbuf))
>   		    {
>   #ifdef RESET_INPUT_BUFFER
> -		      RESET_INPUT_BUFFER;
> -#else
> +		      /* RESET_INPUT_BUFFER can only work when there were
> +			 no new irreversible characters skipped during
> +			 this round.  */
> +		      if (loop_irreversible
> +			  == lirreversible + (irreversible ? *irreversible : 0))
> +			{
> +			  RESET_INPUT_BUFFER;
> +			  goto done_reset;
> +			}
> +#endif

OK.

>   		      /* We have a problem in one of the functions below.
>   			 Undo the conversion upto the error point.  */
>   		      size_t nstatus __attribute__ ((unused));
> @@ -682,9 +701,9 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   		      outbuf = outstart;
>   
>   		      /* Restore the state.  */
> -# ifdef SAVE_RESET_STATE
> +#ifdef SAVE_RESET_STATE
>   		      SAVE_RESET_STATE (0);
> -# endif
> +#endif

OK.

>   
>   		      if (__glibc_likely (!unaligned))
>   			{
> @@ -701,7 +720,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   					       lirreversiblep
>   					       EXTRA_LOOP_ARGS);
>   			}
> -# if POSSIBLY_UNALIGNED
> +#if POSSIBLY_UNALIGNED
>   		      else
>   			{
>   			  if (FROM_DIRECTION)
> @@ -720,7 +739,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   							       lirreversiblep
>   							       EXTRA_LOOP_ARGS);
>   			}
> -# endif
> +#endif
>   
>   		      /* We must run out of output buffer space in this
>   			 rerun.  */
> @@ -731,9 +750,11 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
>   			 the invocation counter.  */
>   		      if (__glibc_unlikely (outbuf == outstart))
>   			--data->__invocation_counter;
> -#endif	/* reset input buffer */
>   		    }
>   
> +#ifdef RESET_INPUT_BUFFER
> +		done_reset:
> +#endif
>   		  /* Change the status.  */
>   		  status = result;

OK.

>   		}
> diff --git a/iconv/tst-iconv7.c b/iconv/tst-iconv7.c
> new file mode 100644
> index 0000000000..10372bf79f
> --- /dev/null
> +++ b/iconv/tst-iconv7.c
> @@ -0,0 +1,55 @@
> +/* Test iconv buffer handling with the IGNORE error handler.
> +   Copyright (C) 2019 Free Software Foundation, Inc.

OK.

> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Derived from BZ #18830 */
> +#include <errno.h>
> +#include <iconv.h>
> +#include <stdio.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> +  /* This conversion needs two steps, from ASCII to INTERNAL to ASCII.  */
> +  iconv_t cd = iconv_open ("ASCII//IGNORE", "ASCII");
> +  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
> +
> +  /* Convert some irreversible sequence, enough to trigger an overflow of
> +     the output buffer before the irreversible character in the second
> +     step, but after going past the irreversible character in the first
> +     step.  */
> +  char input[4 + 4] = { '0', '1', '2', '3', '4', '5', '\266', '7' };

OK.

> +  char *inptr = input;
> +  size_t insize = sizeof (input);
> +  char output[4];
> +  char *outptr = output;
> +  size_t outsize = sizeof (output);
> +
> +  /* The conversion should fail.  */
> +  TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1);
> +  TEST_VERIFY (errno == E2BIG);
> +  /* The conversion should not consume more than it was able to store in
> +     the output buffer.  */
> +  TEST_COMPARE (inptr - input, sizeof (output) - outsize);

OK. Good comments.

> +
> +  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
> +
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
> index ce7bcf541e..2861b6dacb 100644
> --- a/sysdeps/s390/multiarch/gconv_simple.c
> +++ b/sysdeps/s390/multiarch/gconv_simple.c
> @@ -404,7 +404,7 @@ ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
>   				      const unsigned char **inptrp,
>   				      const unsigned char *inend,
>   				      unsigned char **outptrp,
> -				      unsigned char *outend,
> +				      const unsigned char *outend,
>   				      size_t *irreversible)
>   {
>     const unsigned char *inptr = *inptrp;
> @@ -504,7 +504,7 @@ ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
>   				    const unsigned char **inptrp,
>   				    const unsigned char *inend,
>   				    unsigned char **outptrp,
> -				    unsigned char *outend,
> +				    const unsigned char *outend,
>   				    size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> @@ -631,7 +631,7 @@ ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
>   				      const unsigned char **inptrp,
>   				      const unsigned char *inend,
>   				      unsigned char **outptrp,
> -				      unsigned char *outend,
> +				      const unsigned char *outend,
>   				      size_t *irreversible)
>   {
>     int flags = step_data->__flags;
> 

OK.
diff mbox series

Patch

diff --git a/iconv/Makefile b/iconv/Makefile
index f6631e861d..74cd9bf860 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -44,7 +44,7 @@  CFLAGS-linereader.c += -DNO_TRANSLITERATION
 CFLAGS-simple-hash.c += -I../locale
 
 tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
-	  tst-iconv-mt
+	  tst-iconv7 tst-iconv-mt
 
 others		= iconv_prog iconvconfig
 install-others-programs	= $(inst_bindir)/iconv
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 35aaa8aacd..75ce8fb1f4 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -76,7 +76,7 @@  __attribute ((always_inline))
 internal_ucs4_loop (struct __gconv_step *step,
 		    struct __gconv_step_data *step_data,
 		    const unsigned char **inptrp, const unsigned char *inend,
-		    unsigned char **outptrp, unsigned char *outend,
+		    unsigned char **outptrp, const unsigned char *outend,
 		    size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -120,7 +120,8 @@  internal_ucs4_loop_unaligned (struct __gconv_step *step,
 			      struct __gconv_step_data *step_data,
 			      const unsigned char **inptrp,
 			      const unsigned char *inend,
-			      unsigned char **outptrp, unsigned char *outend,
+			      unsigned char **outptrp,
+			      const unsigned char *outend,
 			      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -169,7 +170,8 @@  internal_ucs4_loop_single (struct __gconv_step *step,
 			   struct __gconv_step_data *step_data,
 			   const unsigned char **inptrp,
 			   const unsigned char *inend,
-			   unsigned char **outptrp, unsigned char *outend,
+			   unsigned char **outptrp,
+			   const unsigned char *outend,
 			   size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -231,7 +233,7 @@  __attribute ((always_inline))
 ucs4_internal_loop (struct __gconv_step *step,
 		    struct __gconv_step_data *step_data,
 		    const unsigned char **inptrp, const unsigned char *inend,
-		    unsigned char **outptrp, unsigned char *outend,
+		    unsigned char **outptrp, const unsigned char *outend,
 		    size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -298,7 +300,8 @@  ucs4_internal_loop_unaligned (struct __gconv_step *step,
 			      struct __gconv_step_data *step_data,
 			      const unsigned char **inptrp,
 			      const unsigned char *inend,
-			      unsigned char **outptrp, unsigned char *outend,
+			      unsigned char **outptrp,
+			      const unsigned char *outend,
 			      size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -368,7 +371,8 @@  ucs4_internal_loop_single (struct __gconv_step *step,
 			   struct __gconv_step_data *step_data,
 			   const unsigned char **inptrp,
 			   const unsigned char *inend,
-			   unsigned char **outptrp, unsigned char *outend,
+			   unsigned char **outptrp,
+			   const unsigned char *outend,
 			   size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -443,7 +447,7 @@  __attribute ((always_inline))
 internal_ucs4le_loop (struct __gconv_step *step,
 		      struct __gconv_step_data *step_data,
 		      const unsigned char **inptrp, const unsigned char *inend,
-		      unsigned char **outptrp, unsigned char *outend,
+		      unsigned char **outptrp, const unsigned char *outend,
 		      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -488,7 +492,8 @@  internal_ucs4le_loop_unaligned (struct __gconv_step *step,
 				struct __gconv_step_data *step_data,
 				const unsigned char **inptrp,
 				const unsigned char *inend,
-				unsigned char **outptrp, unsigned char *outend,
+				unsigned char **outptrp,
+				const unsigned char *outend,
 				size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -540,7 +545,8 @@  internal_ucs4le_loop_single (struct __gconv_step *step,
 			     struct __gconv_step_data *step_data,
 			     const unsigned char **inptrp,
 			     const unsigned char *inend,
-			     unsigned char **outptrp, unsigned char *outend,
+			     unsigned char **outptrp,
+			     const unsigned char *outend,
 			     size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
@@ -601,7 +607,7 @@  __attribute ((always_inline))
 ucs4le_internal_loop (struct __gconv_step *step,
 		      struct __gconv_step_data *step_data,
 		      const unsigned char **inptrp, const unsigned char *inend,
-		      unsigned char **outptrp, unsigned char *outend,
+		      unsigned char **outptrp, const unsigned char *outend,
 		      size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -671,7 +677,8 @@  ucs4le_internal_loop_unaligned (struct __gconv_step *step,
 				struct __gconv_step_data *step_data,
 				const unsigned char **inptrp,
 				const unsigned char *inend,
-				unsigned char **outptrp, unsigned char *outend,
+				unsigned char **outptrp,
+				const unsigned char *outend,
 				size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -745,7 +752,8 @@  ucs4le_internal_loop_single (struct __gconv_step *step,
 			     struct __gconv_step_data *step_data,
 			     const unsigned char **inptrp,
 			     const unsigned char *inend,
-			     unsigned char **outptrp, unsigned char *outend,
+			     unsigned char **outptrp,
+			     const unsigned char *outend,
 			     size_t *irreversible)
 {
   mbstate_t *state = step_data->__statep;
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index cc39fdcc70..d43fac7683 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -597,6 +597,10 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 	  inptr = *inptrp;
 	  /* The outbuf buffer is empty.  */
 	  outstart = outbuf;
+#ifdef RESET_INPUT_BUFFER
+	  size_t loop_irreversible
+	    = lirreversible + (irreversible ? *irreversible : 0);
+#endif
 
 #ifdef SAVE_RESET_STATE
 	  SAVE_RESET_STATE (1);
@@ -671,8 +675,16 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		  if (__glibc_unlikely (outerr != outbuf))
 		    {
 #ifdef RESET_INPUT_BUFFER
-		      RESET_INPUT_BUFFER;
-#else
+		      if (loop_irreversible
+			  == lirreversible + (irreversible ? *irreversible : 0))
+			{
+			  /* RESET_INPUT_BUFFER can only work if there
+			     were no irreversible characters during the
+			     last loop.  */
+			  RESET_INPUT_BUFFER;
+			  goto done_reset;
+			}
+#endif
 		      /* We have a problem in one of the functions below.
 			 Undo the conversion upto the error point.  */
 		      size_t nstatus __attribute__ ((unused));
@@ -682,9 +694,9 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 		      outbuf = outstart;
 
 		      /* Restore the state.  */
-# ifdef SAVE_RESET_STATE
+#ifdef SAVE_RESET_STATE
 		      SAVE_RESET_STATE (0);
-# endif
+#endif
 
 		      if (__glibc_likely (!unaligned))
 			{
@@ -701,7 +713,7 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 					       lirreversiblep
 					       EXTRA_LOOP_ARGS);
 			}
-# if POSSIBLY_UNALIGNED
+#if POSSIBLY_UNALIGNED
 		      else
 			{
 			  if (FROM_DIRECTION)
@@ -720,7 +732,7 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 							       lirreversiblep
 							       EXTRA_LOOP_ARGS);
 			}
-# endif
+#endif
 
 		      /* We must run out of output buffer space in this
 			 rerun.  */
@@ -731,9 +743,11 @@  FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
 			 the invocation counter.  */
 		      if (__glibc_unlikely (outbuf == outstart))
 			--data->__invocation_counter;
-#endif	/* reset input buffer */
 		    }
 
+#ifdef RESET_INPUT_BUFFER
+		done_reset:
+#endif
 		  /* Change the status.  */
 		  status = result;
 		}
diff --git a/iconv/tst-iconv7.c b/iconv/tst-iconv7.c
new file mode 100644
index 0000000000..ee793d2f3e
--- /dev/null
+++ b/iconv/tst-iconv7.c
@@ -0,0 +1,67 @@ 
+/* Test iconv buffer handling with the IGNORE error handler.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Derived from BZ #18830 */
+#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+
+
+static int
+do_test (void)
+{
+  iconv_t cd = iconv_open ("ASCII//IGNORE", "ASCII");
+  if (cd == (iconv_t) -1)
+    {
+      puts ("iconv_open failed");
+      return 1;
+    }
+
+  char input[5 + 3] = { 0, 0, 0, 0, 0, '1', '\200', '2' };
+  char *inptr = input;
+  size_t insize = sizeof (input);
+  char output[5];
+  char *outptr = output;
+  size_t outsize = sizeof (output);
+
+  size_t ret = iconv (cd, &inptr, &insize, &outptr, &outsize);
+  if (ret != (size_t) -1)
+    {
+      puts ("iconv succeeded");
+      return 1;
+    }
+  if (errno != E2BIG)
+    {
+      puts ("iconv did not set errno to E2BIG");
+      return 1;
+    }
+  if (inptr != input + sizeof (output) - outsize)
+    {
+      printf ("iconv consumed %td characters\n", inptr - input);
+      return 1;
+    }
+
+  if (iconv_close (cd) == -1)
+    {
+      puts ("iconv_close failed");
+      return 1;
+    }
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
index ce7bcf541e..2861b6dacb 100644
--- a/sysdeps/s390/multiarch/gconv_simple.c
+++ b/sysdeps/s390/multiarch/gconv_simple.c
@@ -404,7 +404,7 @@  ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
 				      const unsigned char **inptrp,
 				      const unsigned char *inend,
 				      unsigned char **outptrp,
-				      unsigned char *outend,
+				      const unsigned char *outend,
 				      size_t *irreversible)
 {
   const unsigned char *inptr = *inptrp;
@@ -504,7 +504,7 @@  ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
 				    const unsigned char **inptrp,
 				    const unsigned char *inend,
 				    unsigned char **outptrp,
-				    unsigned char *outend,
+				    const unsigned char *outend,
 				    size_t *irreversible)
 {
   int flags = step_data->__flags;
@@ -631,7 +631,7 @@  ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
 				      const unsigned char **inptrp,
 				      const unsigned char *inend,
 				      unsigned char **outptrp,
-				      unsigned char *outend,
+				      const unsigned char *outend,
 				      size_t *irreversible)
 {
   int flags = step_data->__flags;