[libffi,ARM] VFP hard-float calling convention support

Message ID	m362wm5gsf.fsf@redhat.com
State	New
Headers	show Return-Path: <gcc-patches-return-276614-incoming=patchwork.ozlabs.org@gcc.gnu.org> To: gcc-patches@gcc.gnu.org Subject: [Patch, libffi, ARM] VFP hard-float calling convention support User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.2 (gnu/linux) From: Anthony Green <green@redhat.com> Date: Thu, 28 Oct 2010 14:11:12 -0400 Message-ID: <m362wm5gsf.fsf@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: src/arm/ffitarget.h =================================================================== --- src/arm/ffitarget.h (revision 166031) +++ src/arm/ffitarget.h (working copy) @@ -1,5 +1,7 @@ /* -----------------------------------------------------------------*-C-*- ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Copyright (c) 2010 CodeSourcery + Target configuration macros for ARM. Permission is hereby granted, free of charge, to any person obtaining @@ -34,11 +36,25 @@ typedef enum ffi_abi { FFI_FIRST_ABI = 0, FFI_SYSV, + FFI_VFP, + FFI_LAST_ABI, +#ifdef __ARM_PCS_VFP + FFI_DEFAULT_ABI = FFI_VFP, +#else FFI_DEFAULT_ABI = FFI_SYSV, - FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +#endif } ffi_abi; #endif +#define FFI_EXTRA_CIF_FIELDS \ + int vfp_used; \ + short vfp_reg_free, vfp_nargs; \ + signed char vfp_args[16] \ + +/* Internally used. */ +#define FFI_TYPE_STRUCT_VFP_FLOAT (FFI_TYPE_LAST + 1) +#define FFI_TYPE_STRUCT_VFP_DOUBLE (FFI_TYPE_LAST + 2) + /* ---- Definitions for closures ----------------------------------------- */ #define FFI_CLOSURES 1 Index: src/arm/ffi.c =================================================================== --- src/arm/ffi.c (revision 166031) +++ src/arm/ffi.c (working copy) @@ -29,12 +29,20 @@ #include <stdlib.h> +/* Forward declares. */ +static int vfp_type_p (ffi_type *); +static void layout_vfp_args (ffi_cif *); + /* ffi_prep_args is called by the assembly routine once stack space - has been allocated for the function's arguments */ - -void ffi_prep_args(char *stack, extended_cif *ecif) + has been allocated for the function's arguments + + The vfp_space parameter is the load area for VFP regs, the return + value is cif->vfp_used (word bitset of VFP regs used for passing + arguments). These are only used for the VFP hard-float ABI. +*/ +int ffi_prep_args(char *stack, extended_cif *ecif, float *vfp_space) { - register unsigned int i; + register unsigned int i, vi = 0; register void **p_argv; register char *argp; register ffi_type **p_arg; @@ -54,6 +62,21 @@ { size_t z; + /* Allocated in VFP registers. */ + if (ecif->cif->abi == FFI_VFP + && vi < ecif->cif->vfp_nargs && vfp_type_p (*p_arg)) + { + float* vfp_slot = vfp_space + ecif->cif->vfp_args[vi++]; + if ((*p_arg)->type == FFI_TYPE_FLOAT) + *((float*)vfp_slot) = *((float*)*p_argv); + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + *((double*)vfp_slot) = *((double*)*p_argv); + else + memcpy(vfp_slot, *p_argv, (*p_arg)->size); + p_argv++; + continue; + } + /* Align if necessary */ if (((*p_arg)->alignment - 1) & (unsigned) argp) { argp = (char *) ALIGN(argp, (*p_arg)->alignment); @@ -103,13 +126,15 @@ p_argv++; argp += z; } - - return; + + /* Indicate the VFP registers used. */ + return ecif->cif->vfp_used; } /* Perform machine dependent cif processing */ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) { + int type_code; /* Round the stack up to a multiple of 8 bytes. This isn't needed everywhere, but it is on some platforms, and it doesn't harm anything when it isn't needed. */ @@ -130,7 +155,14 @@ break; case FFI_TYPE_STRUCT: - if (cif->rtype->size <= 4) + if (cif->abi == FFI_VFP + && (type_code = vfp_type_p (cif->rtype)) != 0) + { + /* A Composite Type passed in VFP registers, either + FFI_TYPE_STRUCT_VFP_FLOAT or FFI_TYPE_STRUCT_VFP_DOUBLE. */ + cif->flags = (unsigned) type_code; + } + else if (cif->rtype->size <= 4) /* A Composite Type not larger than 4 bytes is returned in r0. */ cif->flags = (unsigned)FFI_TYPE_INT; else @@ -145,11 +177,18 @@ break; } + /* Map out the register placements of VFP register args. + The VFP hard-float calling conventions are slightly more sophisticated than + the base calling conventions, so we do it here instead of in ffi_prep_args(). */ + if (cif->abi == FFI_VFP) + layout_vfp_args (cif); + return FFI_OK; } -extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, - unsigned, unsigned, unsigned *, void (*fn)(void)); +/* Prototypes for assembly functions, in sysv.S */ +extern void ffi_call_SYSV (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *); +extern void ffi_call_VFP (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *); void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { @@ -157,6 +196,8 @@ int small_struct = (cif->flags == FFI_TYPE_INT && cif->rtype->type == FFI_TYPE_STRUCT); + int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT + || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE); ecif.cif = cif; ecif.avalue = avalue; @@ -173,38 +214,51 @@ } else if (small_struct) ecif.rvalue = &temp; + else if (vfp_struct) + { + /* Largest case is double x 4. */ + ecif.rvalue = alloca(32); + } else ecif.rvalue = rvalue; switch (cif->abi) { case FFI_SYSV: - ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue, - fn); + ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); + break; + case FFI_VFP: + ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); break; + default: FFI_ASSERT(0); break; } if (small_struct) memcpy (rvalue, &temp, cif->rtype->size); + else if (vfp_struct) + memcpy (rvalue, ecif.rvalue, cif->rtype->size); } /** private members **/ static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, - void** args, ffi_cif* cif); + void** args, ffi_cif* cif, float *vfp_stack); void ffi_closure_SYSV (ffi_closure *); +void ffi_closure_VFP (ffi_closure *); + /* This function is jumped to by the trampoline */ unsigned int -ffi_closure_SYSV_inner (closure, respp, args) +ffi_closure_SYSV_inner (closure, respp, args, vfp_args) ffi_closure *closure; void **respp; void *args; + void *vfp_args; { // our various things... ffi_cif *cif; @@ -219,7 +273,7 @@ * a structure, it will re-set RESP to point to the * structure return address. */ - ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args); (closure->fun) (cif, *respp, arg_area, closure->user_data); @@ -229,10 +283,12 @@ /*@-exportheader@*/ static void ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, - void **avalue, ffi_cif *cif) + void **avalue, ffi_cif *cif, + /* Used only under VFP hard-float ABI. */ + float *vfp_stack) /*@=exportheader@*/ { - register unsigned int i; + register unsigned int i, vi = 0; register void **p_argv; register char *argp; register ffi_type **p_arg; @@ -249,8 +305,16 @@ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) { size_t z; + size_t alignment; + + if (cif->abi == FFI_VFP + && vi < cif->vfp_nargs && vfp_type_p (*p_arg)) + { + *p_argv++ = (void*)(vfp_stack + cif->vfp_args[vi++]); + continue; + } - size_t alignment = (*p_arg)->alignment; + alignment = (*p_arg)->alignment; if (alignment < 4) alignment = 4; /* Align if necessary */ @@ -295,10 +359,17 @@ void *user_data, void *codeloc) { - FFI_ASSERT (cif->abi == FFI_SYSV); + void (*closure_func)(ffi_closure*) = NULL; + if (cif->abi == FFI_SYSV) + closure_func = &ffi_closure_SYSV; + else if (cif->abi == FFI_VFP) + closure_func = &ffi_closure_VFP; + else + FFI_ASSERT (0); + FFI_INIT_TRAMPOLINE (&closure->tramp[0], \ - &ffi_closure_SYSV, \ + closure_func, \ codeloc); closure->cif = cif; @@ -307,3 +378,123 @@ return FFI_OK; } + +/* Below are routines for VFP hard-float support. */ + +static int rec_vfp_type_p (ffi_type *t, int *elt, int *elnum) +{ + switch (t->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + *elt = (int) t->type; + *elnum = 1; + return 1; + + case FFI_TYPE_STRUCT_VFP_FLOAT: + *elt = FFI_TYPE_FLOAT; + *elnum = t->size / sizeof (float); + return 1; + + case FFI_TYPE_STRUCT_VFP_DOUBLE: + *elt = FFI_TYPE_DOUBLE; + *elnum = t->size / sizeof (double); + return 1; + + case FFI_TYPE_STRUCT:; + { + int base_elt = 0, total_elnum = 0; + ffi_type **el = t->elements; + while (*el) + { + int el_elt = 0, el_elnum = 0; + if (! rec_vfp_type_p (*el, &el_elt, &el_elnum) + || (base_elt && base_elt != el_elt) + || total_elnum + el_elnum > 4) + return 0; + base_elt = el_elt; + total_elnum += el_elnum; + el++; + } + *elnum = total_elnum; + *elt = base_elt; + return 1; + } + default: ; + } + return 0; +} + +static int vfp_type_p (ffi_type *t) +{ + int elt, elnum; + if (rec_vfp_type_p (t, &elt, &elnum)) + { + if (t->type == FFI_TYPE_STRUCT) + { + if (elnum == 1) + t->type = elt; + else + t->type = (elt == FFI_TYPE_FLOAT + ? FFI_TYPE_STRUCT_VFP_FLOAT + : FFI_TYPE_STRUCT_VFP_DOUBLE); + } + return (int) t->type; + } + return 0; +} + +static void place_vfp_arg (ffi_cif *cif, ffi_type *t) +{ + int reg = cif->vfp_reg_free; + int nregs = t->size / sizeof (float); + int align = ((t->type == FFI_TYPE_STRUCT_VFP_FLOAT + || t->type == FFI_TYPE_FLOAT) ? 1 : 2); + /* Align register number. */ + if ((reg & 1) && align == 2) + reg++; + while (reg + nregs <= 16) + { + int s, new_used = 0; + for (s = reg; s < reg + nregs; s++) + { + new_used |= (1 << s); + if (cif->vfp_used & (1 << s)) + { + reg += align; + goto next_reg; + } + } + /* Found regs to allocate. */ + cif->vfp_used |= new_used; + cif->vfp_args[cif->vfp_nargs++] = reg; + + /* Update vfp_reg_free. */ + if (cif->vfp_used & (1 << cif->vfp_reg_free)) + { + reg += nregs; + while (cif->vfp_used & (1 << reg)) + reg += 1; + cif->vfp_reg_free = reg; + } + return; + next_reg: ; + } +} + +static void layout_vfp_args (ffi_cif *cif) +{ + int i; + /* Init VFP fields */ + cif->vfp_used = 0; + cif->vfp_nargs = 0; + cif->vfp_reg_free = 0; + memset (cif->vfp_args, -1, 16); /* Init to -1. */ + + for (i = 0; i < cif->nargs; i++) + { + ffi_type *t = cif->arg_types[i]; + if (vfp_type_p (t)) + place_vfp_arg (cif, t); + } +} Index: src/arm/sysv.S =================================================================== --- src/arm/sysv.S (revision 166031) +++ src/arm/sysv.S (working copy) @@ -142,12 +142,11 @@ .endm - @ r0: ffi_prep_args + @ r0: fn @ r1: &ecif @ r2: cif->bytes @ r3: fig->flags @ sp+0: ecif.rvalue - @ sp+4: fn @ This assumes we are using gas. ARM_FUNC_START ffi_call_SYSV @@ -162,24 +161,23 @@ sub sp, fp, r2 @ Place all of the ffi_prep_args in position - mov ip, r0 mov r0, sp @ r1 already set @ Call ffi_prep_args(stack, &ecif) - call_reg(ip) + bl ffi_prep_args @ move first 4 parameters in registers ldmia sp, {r0-r3} @ and adjust stack - ldr ip, [fp, #8] - cmp ip, #16 - movhs ip, #16 - add sp, sp, ip + sub lr, fp, sp @ cif->bytes == fp - sp + ldr ip, [fp] @ load fn() in advance + cmp lr, #16 + movhs lr, #16 + add sp, sp, lr @ call (fn) (...) - ldr ip, [fp, #28] call_reg(ip) @ Remove the space we pushed for the args @@ -230,6 +228,101 @@ UNWIND .fnend .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + + @ r0: fn + @ r1: &ecif + @ r2: cif->bytes + @ r3: fig->flags + @ sp+0: ecif.rvalue + +ARM_FUNC_START ffi_call_VFP + @ Save registers + stmfd sp!, {r0-r3, fp, lr} + UNWIND .save {r0-r3, fp, lr} + mov fp, sp + UNWIND .setfp fp, sp + + @ Make room for all of the new args. + sub sp, sp, r2 + + @ Make room for loading VFP args + sub sp, sp, #64 + + @ Place all of the ffi_prep_args in position + mov r0, sp + @ r1 already set + sub r2, fp, #64 @ VFP scratch space + + @ Call ffi_prep_args(stack, &ecif, vfp_space) + bl ffi_prep_args + + @ Load VFP register args if needed + cmp r0, #0 + beq LSYM(Lbase_args) + + @ Load only d0 if possible + cmp r0, #3 + sub ip, fp, #64 + flddle d0, [ip] + fldmiadgt ip, {d0-d7} + +LSYM(Lbase_args): + @ move first 4 parameters in registers + ldmia sp, {r0-r3} + + @ and adjust stack + sub lr, ip, sp @ cif->bytes == (fp - 64) - sp + ldr ip, [fp] @ load fn() in advance + cmp lr, #16 + movhs lr, #16 + add sp, sp, lr + + @ call (fn) (...) + call_reg(ip) + + @ Remove the space we pushed for the args + mov sp, fp + + @ Load r2 with the pointer to storage for + @ the return value + ldr r2, [sp, #24] + + @ Load r3 with the return type code + ldr r3, [sp, #12] + + @ If the return value pointer is NULL, + @ assume no return value. + cmp r2, #0 + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_INT + streq r0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_SINT64 + stmeqia r2, {r0, r1} + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_FLOAT + fstseq s0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_DOUBLE + fstdeq d0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_STRUCT_VFP_FLOAT + cmpne r3, #FFI_TYPE_STRUCT_VFP_DOUBLE + fstmiadeq r2, {d0-d3} + +LSYM(Lepilogue_vfp): + RETLDM "r0-r3,fp" + +.ffi_call_VFP_end: + UNWIND .fnend + .size CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP) + + /* unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (closure, respp, args) @@ -302,6 +395,68 @@ UNWIND .fnend .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + +ARM_FUNC_START ffi_closure_VFP + fstmfdd sp!, {d0-d7} + @ r0-r3, then d0-d7 + UNWIND .pad #80 + add ip, sp, #80 + stmfd sp!, {ip, lr} + UNWIND .save {r0, lr} + add r2, sp, #72 + add r3, sp, #8 + .pad #72 + sub sp, sp, #72 + str sp, [sp, #64] + add r1, sp, #64 + bl ffi_closure_SYSV_inner + + cmp r0, #FFI_TYPE_INT + beq .Lretint_vfp + + cmp r0, #FFI_TYPE_FLOAT + beq .Lretfloat_vfp + + cmp r0, #FFI_TYPE_DOUBLE + cmpne r0, #FFI_TYPE_LONGDOUBLE + beq .Lretdouble_vfp + + cmp r0, #FFI_TYPE_SINT64 + beq .Lretlonglong_vfp + + cmp r0, #FFI_TYPE_STRUCT_VFP_FLOAT + beq .Lretfloat_struct_vfp + + cmp r0, #FFI_TYPE_STRUCT_VFP_DOUBLE + beq .Lretdouble_struct_vfp + +.Lclosure_epilogue_vfp: + add sp, sp, #72 + ldmfd sp, {sp, pc} + +.Lretfloat_vfp: + flds s0, [sp] + b .Lclosure_epilogue_vfp +.Lretdouble_vfp: + fldd d0, [sp] + b .Lclosure_epilogue_vfp +.Lretint_vfp: + ldr r0, [sp] + b .Lclosure_epilogue_vfp +.Lretlonglong_vfp: + ldmia sp, {r0, r1} + b .Lclosure_epilogue_vfp +.Lretfloat_struct_vfp: + fldmiad sp, {d0-d1} + b .Lclosure_epilogue_vfp +.Lretdouble_struct_vfp: + fldmiad sp, {d0-d3} + b .Lclosure_epilogue_vfp + +.ffi_closure_VFP_end: + UNWIND .fnend + .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP) + #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",%progbits #endif Index: testsuite/libffi.call/cls_double_va.c =================================================================== --- testsuite/libffi.call/cls_double_va.c (revision 166031) +++ testsuite/libffi.call/cls_double_va.c (working copy) @@ -6,6 +6,8 @@ /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */ /* { dg-output "" { xfail avr32*-*-* } } */ +/* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */ + #include "ffitest.h" static void Index: testsuite/libffi.call/cls_longdouble_va.c =================================================================== --- testsuite/libffi.call/cls_longdouble_va.c (revision 166031) +++ testsuite/libffi.call/cls_longdouble_va.c (working copy) @@ -6,6 +6,8 @@ /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */ /* { dg-output "" { xfail avr32*-*-* x86_64-*-mingw* } } */ +/* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */ + #include "ffitest.h" static void Index: testsuite/lib/libffi-dg.exp =================================================================== --- testsuite/lib/libffi-dg.exp (revision 166031) +++ testsuite/lib/libffi-dg.exp (working copy) @@ -272,7 +272,57 @@ } } +proc check-flags { args } { + # The args are within another list; pull them out. + set args [lindex $args 0] + + # The next two arguments are optional. If they were not specified, + # use the defaults. + if { [llength $args] == 2 } { + lappend $args [list "*"] + } + if { [llength $args] == 3 } { + lappend $args [list ""] + } + + # If the option strings are the defaults, or the same as the + # defaults, there is no need to call check_conditional_xfail to + # compare them to the actual options. + if { [string compare [lindex $args 2] "*"] == 0 + && [string compare [lindex $args 3] "" ] == 0 } { + set result 1 + } else { + # The target list might be an effective-target keyword, so replace + # the original list with "*-*-*", since we already know it matches. + set result [check_conditional_xfail [lreplace $args 1 1 "*-*-*"]] + } + + return $result +} + +proc dg-skip-if { args } { + # Verify the number of arguments. The last two are optional. + set args [lreplace $args 0 0] + if { [llength $args] < 2 || [llength $args] > 4 } { + error "dg-skip-if 2: need 2, 3, or 4 arguments" + } + + # Don't bother if we're already skipping the test. + upvar dg-do-what dg-do-what + if { [lindex ${dg-do-what} 1] == "N" } { + return + } + + set selector [list target [lindex $args 1]] + if { [dg-process-target $selector] == "S" } { + if [check-flags $args] { + upvar dg-do-what dg-do-what + set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"] + } + } +} + # We need to make sure that additional_files and additional_sources # are both cleared out after every test. It is not enough to clear # them out *before* the next test run because gcc-target-compile gets

[libffi,ARM] VFP hard-float calling convention support

Commit Message

Comments

Patch