From 03445cddc87a1ce67b00e3d40cc17be38e494a1d Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 27 Oct 2024 13:53:37 +0800
Subject: [PATCH v2] Optimize incoming integer argument promotion
For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
true, all integer arguments smaller than int are passed as int:
[hjl@gnu-tgl-3 pr14907]$ cat x.c
extern int baz (char c1);
int
foo (char c1)
{
return baz (c1);
}
[hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
[hjl@gnu-tgl-3 pr14907]$ cat x.s
.file "x.c"
.text
.p2align 4
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
movsbl 4(%esp), %eax
movl %eax, 4(%esp)
jmp baz
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-tgl-3 pr14907]$
But integer promotion:
movsbl 4(%esp), %eax
movl %eax, 4(%esp)
isn't necessary if incoming arguments and outgoing arguments are the
same. Use unpromoted incoming integer arguments as outgoing arguments
if incoming integer arguments are the same as outgoing arguments to
avoid unnecessary integer promotion.
NB: This optimization only applies to calling external functions since
for
static void foo (char c) { ... }
void bar (char c) { foo (c); }
the existing combine optimization assumes that all callers of the local
function, foo, will promote the outgoing char argument to int. Since
we don't control all callers of the global function, bar, we must extend
bar's outgoing char argument to int when calling foo.
gcc/
PR middle-end/14907
* calls.cc: Include "ssa.h", "tree-ssa-live.h" and
"tree-outof-ssa.h".
(arg_data): Add unpromoted_int_parm_rtx.
(precompute_register_parameters): Use unpromoted_int_parm_rtx
as argument value if available.
(get_unpromoted_int_parm_rtx_from_ssa_name): New function.
(get_unpromoted_int_parm_rtx): Likewise.
(initialize_argument_information): Add an argument for function
parameter types. Set unpromoted_int_parm_rtx if integer function
arguments of a external function are promoted to int. Change
mode, reg and tail_call_reg to the same mode as
unpromoted_int_parm_rtx.
(expand_call): Pass type_arg_types to
initialize_argument_information.
(store_one_arg): Use unpromoted_int_parm_rtx as argument value
if available.
gcc/testsuite/
PR middle-end/14907
* gcc.target/i386/pr14907-1.c: New test.
* gcc.target/i386/pr14907-2.c: Likewise.
* gcc.target/i386/pr14907-3.c: Likewise.
* gcc.target/i386/pr14907-4.c: Likewise.
* gcc.target/i386/pr14907-5.c: Likewise.
* gcc.target/i386/pr14907-6.c: Likewise.
* gcc.target/i386/pr14907-7.c: Likewise.
* gcc.target/i386/pr14907-8.c: Likewise.
* gcc.target/i386/pr14907-9.c: Likewise.
* gcc.target/i386/pr14907-10.c: Likewise.
* gcc.target/i386/pr14907-11.c: Likewise.
* gcc.target/i386/pr14907-12.c: Likewise.
* gcc.target/i386/pr14907-13.c: Likewise.
* gcc.target/i386/pr14907-14.c: Likewise.
* gcc.target/i386/pr14907-15.c: Likewise.
---
gcc/calls.cc | 155 +++++++++++++++++++--
gcc/testsuite/gcc.target/i386/pr14907-1.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-10.c | 23 +++
gcc/testsuite/gcc.target/i386/pr14907-11.c | 12 ++
gcc/testsuite/gcc.target/i386/pr14907-12.c | 17 +++
gcc/testsuite/gcc.target/i386/pr14907-13.c | 12 ++
gcc/testsuite/gcc.target/i386/pr14907-14.c | 17 +++
gcc/testsuite/gcc.target/i386/pr14907-15.c | 27 ++++
gcc/testsuite/gcc.target/i386/pr14907-2.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-3.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-4.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-5.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-6.c | 21 +++
gcc/testsuite/gcc.target/i386/pr14907-7.c | 22 +++
gcc/testsuite/gcc.target/i386/pr14907-8.c | 23 +++
gcc/testsuite/gcc.target/i386/pr14907-9.c | 22 +++
16 files changed, 448 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-9.c
@@ -62,6 +62,9 @@ along with GCC; see the file COPYING3. If not see
#include "value-query.h"
#include "tree-pretty-print.h"
#include "tree-eh.h"
+#include "ssa.h"
+#include "tree-ssa-live.h"
+#include "tree-outof-ssa.h"
/* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits. */
#define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
@@ -76,6 +79,10 @@ struct arg_data
machine_mode mode;
/* Current RTL value for argument, or 0 if it isn't precomputed. */
rtx value;
+ /* Unpromoted incoming integer argument RTL value as outgoing argument.
+ or 0 if outgoing argument isn't a promoted integer incoming argument.
+ */
+ rtx unpromoted_int_parm_rtx;
/* Initially-compute RTL value for argument; only for const functions. */
rtx initial_value;
/* Register to pass this argument in, 0 if passed on stack, or an
@@ -1020,7 +1027,10 @@ precompute_register_parameters (int num_actuals, struct arg_data *args,
if (args[i].value == 0)
{
push_temp_slots ();
- args[i].value = expand_normal (args[i].tree_value);
+ if (args[i].unpromoted_int_parm_rtx)
+ args[i].value = args[i].unpromoted_int_parm_rtx;
+ else
+ args[i].value = expand_normal (args[i].tree_value);
preserve_temp_slots (args[i].value);
pop_temp_slots ();
}
@@ -1281,6 +1291,63 @@ maybe_complain_about_tail_call (tree call_expr, const char *reason)
CALL_EXPR_MUST_TAIL_CALL (call_expr) = 0;
}
+/* Return unpromoted integer function argument rtx if SSA NAME ARG is a
+ promoted integer function parameter and the unpromoted integer function
+ argument mode is the same as the mode of the outgoing argument type
+ ARGTYPE. Otherwise, return nullptr. */
+
+static rtx
+get_unpromoted_int_parm_rtx_from_ssa_name (tree argtype, tree arg)
+{
+ tree var = SSA_NAME_VAR (arg);
+ if (TREE_CODE (var) != PARM_DECL)
+ return nullptr;
+ rtx op = get_rtx_for_ssa_name (arg);
+ tree type = DECL_ARG_TYPE (var);
+ machine_mode mode = GET_MODE (op);
+ if (mode == TYPE_MODE (type) || mode != TYPE_MODE (argtype))
+ return nullptr;
+ return op;
+}
+
+/* Return unpromoted integer function argument rtx if ARG is a promoted
+ integer function parameter and the unpromoted integer function argument
+ mode is the same as the mode of the outgoing argument type ARGTYPE.
+ Otherwise, return nullptr. */
+
+static rtx
+get_unpromoted_int_parm_rtx (tree argtype, tree arg)
+{
+ tree type = TREE_TYPE (arg);
+ machine_mode mode = TYPE_MODE (type);
+ if (GET_MODE_CLASS (mode) != MODE_INT)
+ return nullptr;
+
+ if (TREE_CODE (arg) != SSA_NAME)
+ return nullptr;
+
+ if (SSA_NAME_IS_DEFAULT_DEF (arg))
+ return get_unpromoted_int_parm_rtx_from_ssa_name (argtype, arg);
+ else
+ {
+ gimple *stmt = get_gimple_for_ssa_name (arg);
+ if (stmt == nullptr)
+ return nullptr;
+
+ gassign *g = as_a<gassign *> (stmt);
+ if (gimple_assign_rhs_code (g) == NOP_EXPR)
+ {
+ arg = gimple_assign_rhs1 (g);
+ if (TREE_CODE (arg) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (arg))
+ return get_unpromoted_int_parm_rtx_from_ssa_name (argtype,
+ arg);
+ }
+ }
+
+ return nullptr;
+}
+
/* Fill in ARGS_SIZE and ARGS array based on the parameters found in
CALL_EXPR EXP.
@@ -1326,7 +1393,8 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
rtx *old_stack_level,
poly_int64 *old_pending_adj,
bool *must_preallocate, int *ecf_flags,
- bool *may_tailcall, bool call_from_thunk_p)
+ bool *may_tailcall, bool call_from_thunk_p,
+ tree type_arg_types)
{
CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far);
location_t loc = EXPR_LOCATION (exp);
@@ -1375,6 +1443,48 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
}
}
+ /* Initialize unpromoted_int_parm_rtx if integer function arguments
+ of a external function are promoted to int. NB: This optimization
+ only applies to calling external functions since for
+
+ static void foo (char c) { ... }
+ void bar (char c) { foo (c); }
+
+ the existing combine optimization assumes that all callers of the
+ local function, foo, will promote the outgoing char argument to int.
+ Since we don't control all callers of the global function, bar, we
+ must extend bar's outgoing char argument to int when calling foo.
+ */
+ cgraph_node *local_info_node;
+ if (type_arg_types
+ && num_actuals
+ && fndecl
+ && (!(local_info_node = cgraph_node::local_info_node (fndecl))
+ || !local_info_node->local)
+ && targetm.calls.promote_prototypes (TREE_TYPE (fndecl)))
+ {
+ int j = num_actuals - 1;
+ if (struct_value_addr_value
+ && args[j].tree_value == struct_value_addr_value)
+ j--;
+ for (tree chain = type_arg_types;
+ chain;
+ chain = TREE_CHAIN (chain))
+ {
+ tree argtype = TREE_VALUE (chain);
+ if (argtype == void_type_node)
+ break;
+ if (args[j].tree_value
+ && TREE_CODE (args[j].tree_value) != ERROR_MARK)
+ args[j].unpromoted_int_parm_rtx
+ = get_unpromoted_int_parm_rtx (argtype,
+ args[j].tree_value);
+ if (j == 0)
+ break;
+ j--;
+ }
+ }
+
/* I counts args in order (to be) pushed; ARGPOS counts in order written. */
for (argpos = 0; argpos < num_actuals; i--, argpos++)
{
@@ -1546,6 +1656,31 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
else
args[i].tail_call_reg = args[i].reg;
+ if (args[i].unpromoted_int_parm_rtx)
+ {
+ /* If unpromoted_int_parm_rtx is available, change reg and
+ tail_call_reg to the same mode as unpromoted_int_parm_rtx
+ to match the mode of unpromoted_int_parm_rtx. Otherwise,
+ change unpromoted_int_parm_rtx to nullptr. */
+ if ((!args[i].reg
+ || HARD_REGISTER_P (args[i].reg))
+ && (!args[i].tail_call_reg
+ || HARD_REGISTER_P (args[i].tail_call_reg)))
+ {
+ /* Use mode of unpromoted_int_parm_rtx. */
+ args[i].mode = GET_MODE (args[i].unpromoted_int_parm_rtx);
+ if (args[i].reg)
+ args[i].reg
+ = gen_rtx_REG (args[i].mode, REGNO (args[i].reg));
+ if (args[i].tail_call_reg)
+ args[i].tail_call_reg
+ = gen_rtx_REG (args[i].mode,
+ REGNO (args[i].tail_call_reg));
+ }
+ else
+ args[i].unpromoted_int_parm_rtx = nullptr;
+ }
+
if (args[i].reg)
args[i].partial = targetm.calls.arg_partial_bytes (args_so_far, arg);
@@ -3031,7 +3166,8 @@ expand_call (tree exp, rtx target, int ignore)
args_so_far, reg_parm_stack_space,
&old_stack_level, &old_pending_adj,
&must_preallocate, &flags,
- &try_tail_call, CALL_FROM_THUNK_P (exp));
+ &try_tail_call, CALL_FROM_THUNK_P (exp),
+ type_arg_types);
if (args_size.var)
must_preallocate = true;
@@ -5075,11 +5211,14 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
if (arg->pass_on_stack)
stack_arg_under_construction++;
- arg->value = expand_expr (pval,
- (partial
- || TYPE_MODE (TREE_TYPE (pval)) != arg->mode)
- ? NULL_RTX : arg->stack,
- VOIDmode, EXPAND_STACK_PARM);
+ if (arg->unpromoted_int_parm_rtx)
+ arg->value = arg->unpromoted_int_parm_rtx;
+ else
+ arg->value = expand_expr (pval,
+ (partial
+ || TYPE_MODE (TREE_TYPE (pval)) != arg->mode)
+ ? NULL_RTX : arg->stack,
+ VOIDmode, EXPAND_STACK_PARM);
/* If we are promoting object (or for any other reason) the mode
doesn't agree, convert the mode. */
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp baz
+x86* .cfi_endproc
+x86*...
+*/
+
+extern int baz (char);
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "ia32*" "" "" { target { { *-*-linux* *-*-gnu* } && ia32 } } {^\t?\.} } } */
+
+/*
+ia32*foo:
+ia32*.LFB0:
+ia32* .cfi_startproc
+ia32* movsbl 4\(%esp\), %eax
+ia32* movl %eax, 4\(%esp\)
+ia32* jmp baz
+ia32* .cfi_endproc
+ia32*...
+*/
+
+extern int baz (short);
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int baz (char, char);
+
+int
+foo (char c1, char c2)
+{
+ return baz (c1, c2) + 1;
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s
+{
+ char c[20];
+};
+
+extern struct s baz (char, char);
+
+struct s
+foo (char c1, char c2)
+{
+ return baz (c1, c2);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int baz (char, char, ...);
+
+int
+foo (char c1, char c2)
+{
+ return baz (c1, c2, 0, 0, 0, 1);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s
+{
+ char c[20];
+};
+
+extern struct s baz (char, char, ...);
+
+struct s
+foo (char c1, char c2)
+{
+ return baz (c1, c2, 0, 1);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x64*" "" "" { target { { *-*-linux* *-*-gnu* } && { ! ia32 } } } {^\t?\.} } } */
+
+/*
+x64*foo:
+x64*.LFB1:
+x64* .cfi_startproc
+x64* movsbl %dil, %edi
+x64* jmp baz
+x64* .cfi_endproc
+x64*...
+*/
+
+ __attribute__ ((noinline))
+static int
+baz (char c1)
+{
+ return c1;
+}
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp baz
+x86* .cfi_endproc
+x86*...
+*/
+
+extern int baz (int, int, int, int, int, int, char, char);
+
+int
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char c1, char c2)
+{
+ return baz (a1, a2, a3, a4, a5, a6, c1, c2);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*c1:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp c2
+x86* .cfi_endproc
+x86*...
+*/
+
+extern char c2 (char);
+
+char
+c1 (char c)
+{
+ return c2 (c);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp baz
+x86* .cfi_endproc
+x86*...
+*/
+
+extern int baz (short);
+
+int
+foo (short c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp baz
+x86* .cfi_endproc
+x86*...
+*/
+
+extern int baz (int, int, int, int, int, int, short, short);
+
+int
+foo (int a1, int a2, int a3, int a4, int a5, int a6, short c1, short c2)
+{
+ return baz (a1, a2, a3, a4, a5, a6, c1, c2);
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*c1:
+x86*.LFB0:
+x86* .cfi_startproc
+x86* jmp c2
+x86* .cfi_endproc
+x86*...
+*/
+
+extern short c2 (short);
+
+short
+c1 (short c)
+{
+ return c2 (c);
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x64*" "" "" { target { { *-*-linux* *-*-gnu* } && { ! ia32 } } } {^\t?\.} } } */
+
+/*
+x64*foo:
+x64*.LFB0:
+x64* .cfi_startproc
+x64* movsbl %dil, %edi
+x64* jmp baz
+x64* .cfi_endproc
+x64*...
+*/
+
+extern int baz (int);
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "ia32*" "" "" { target { { *-*-linux* *-*-gnu* } && ia32 } } {^\t?\.} } } */
+
+/*
+ia32*foo:
+ia32*.LFB0:
+ia32* .cfi_startproc
+ia32* movsbl 4\(%esp\), %eax
+ia32* movl %eax, 4\(%esp\)
+ia32* jmp baz
+ia32* .cfi_endproc
+ia32*...
+*/
+
+extern int baz (int);
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "x64*" "" "" { target { { *-*-linux* *-*-gnu* } && { ! ia32 } } } {^\t?\.} } } */
+
+/*
+x64*foo:
+x64*.LFB0:
+x64* .cfi_startproc
+x64* movsbl %dil, %edi
+x64* jmp baz
+x64* .cfi_endproc
+x64*...
+*/
+
+extern int baz (short);
+
+int
+foo (char c1)
+{
+ return baz (c1);
+}
--
2.47.0