diff mbox

[AArch64] Fix subreg bug in scalar copysign

Message ID VI1PR0801MB2031C64CEFD6A5AB99C97737FF270@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina March 15, 2017, 4:04 p.m. UTC
Hi All, 

This fixes a bug in the scalar version of copysign where due to a subreg
were generating less than efficient code.

This patch replaces

  return x * __builtin_copysignf (150.0f, y);

which used to generate

	adrp	x1, .LC1
	mov	x0, 2147483648
	ins	v3.d[0], x0
	ldr	s2, [x1, #:lo12:.LC1]
	bsl	v3.8b, v1.8b, v2.8b
	fmul	s0, s0, s3
	ret

.LC1:
	.word	1125515264

with
	mov	x0, 1125515264
	movi	v2.2s, 0x80, lsl 24
	fmov	d3, x0
	bit	v3.8b, v1.8b, v2.8b
	fmul	s0, s0, s3
	ret

removing the incorrect ins.

Regression tested on aarch64-none-linux-gnu and no regressions.

OK for trunk?

Thanks,
Tamar

gcc/
2017-03-15  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64.md
	(copysignsf3): Fix mask generation.

Comments

Tamar Christina May 2, 2017, 9:08 a.m. UTC | #1
Ping
Tamar Christina May 15, 2017, 8:30 a.m. UTC | #2
ping
James Greenhalgh June 6, 2017, 1:34 p.m. UTC | #3
On Wed, Mar 15, 2017 at 04:04:35PM +0000, Tamar Christina wrote:
> Hi All, 
> 
> This fixes a bug in the scalar version of copysign where due to a subreg
> were generating less than efficient code.
> 
> This patch replaces
> 
>   return x * __builtin_copysignf (150.0f, y);
> 
> which used to generate
> 
> 	adrp	x1, .LC1
> 	mov	x0, 2147483648
> 	ins	v3.d[0], x0
> 	ldr	s2, [x1, #:lo12:.LC1]
> 	bsl	v3.8b, v1.8b, v2.8b
> 	fmul	s0, s0, s3
> 	ret
> 
> .LC1:
> 	.word	1125515264
> 
> with
> 	mov	x0, 1125515264
> 	movi	v2.2s, 0x80, lsl 24
> 	fmov	d3, x0
> 	bit	v3.8b, v1.8b, v2.8b
> 	fmul	s0, s0, s3
> 	ret
> 
> removing the incorrect ins.
> 
> Regression tested on aarch64-none-linux-gnu and no regressions.
> 
> OK for trunk?

OK.

Thanks,
James

> gcc/
> 2017-03-15  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* config/aarch64/aarch64.md
> 	(copysignsf3): Fix mask generation.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..435c8f50c0e521b3057c26a482315c5a82574711 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5030,14 +5030,16 @@ 
    (match_operand:SF 2 "register_operand")]
   "TARGET_FLOAT && TARGET_SIMD"
 {
-  rtx mask = gen_reg_rtx (DImode);
+  rtx v_bitmask = gen_reg_rtx (V2SImode);
 
   /* Juggle modes to get us in to a vector mode for BSL.  */
-  rtx op1 = lowpart_subreg (V2SFmode, operands[1], SFmode);
+  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
   rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
   rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 31));
-  emit_insn (gen_aarch64_simd_bslv2sf (tmp, mask, op2, op1));
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (V2SImode,
+						     HOST_WIDE_INT_M1U << 31));
+  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
   emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
   DONE;
 }