@@ -122,14 +122,23 @@ typedef enum ffi_abi {
defined in ffi.c, to determine the exact return type and its size. */
#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2)
-#if defined(POWERPC64) || defined(POWERPC_AIX)
+/* Used by ELFv2 for homogenous structure returns. */
+#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1)
+#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2)
+#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3)
+
+#if _CALL_ELF == 2
+# define FFI_TRAMPOLINE_SIZE 32
+#else
+# if defined(POWERPC64) || defined(POWERPC_AIX)
# if defined(POWERPC_DARWIN64)
# define FFI_TRAMPOLINE_SIZE 48
# else
# define FFI_TRAMPOLINE_SIZE 24
# endif
-#else /* POWERPC || POWERPC_AIX */
+# else /* POWERPC || POWERPC_AIX */
# define FFI_TRAMPOLINE_SIZE 40
+# endif
#endif
#ifndef LIBFFI_ASM
@@ -49,6 +49,7 @@ enum {
FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */
FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
+ FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by ELFv2 */
#ifndef __NO_FPRS__
FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
#endif
@@ -383,6 +384,45 @@ enum {
};
enum { ASM_NEEDS_REGISTERS64 = 4 };
+#if _CALL_ELF == 2
+static unsigned int
+discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
+{
+ switch (t->type)
+ {
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ *elnum = 1;
+ return (int) t->type;
+
+ case FFI_TYPE_STRUCT:;
+ {
+ unsigned int base_elt = 0, total_elnum = 0;
+ ffi_type **el = t->elements;
+ while (*el)
+ {
+ unsigned int el_elt, el_elnum = 0;
+ el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
+ if (el_elt == 0
+ || (base_elt && base_elt != el_elt))
+ return 0;
+ base_elt = el_elt;
+ total_elnum += el_elnum;
+ if (total_elnum > 8)
+ return 0;
+ el++;
+ }
+ *elnum = total_elnum;
+ return base_elt;
+ }
+
+ default:
+ return 0;
+ }
+}
+#endif
+
+
/* ffi_prep_args64 is called by the assembly routine once stack space
has been allocated for the function's arguments.
@@ -470,7 +510,11 @@ ffi_prep_args64 (extended_cif *ecif, uns
stacktop.c = (char *) stack + bytes;
gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
+#if _CALL_ELF == 2
+ rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
+#else
rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
+#endif
fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
fparg_count = 0;
next_arg.ul = gpr_base.ul;
@@ -492,6 +536,8 @@ ffi_prep_args64 (extended_cif *ecif, uns
i < nargs;
i++, ptr++, p_argv.v++)
{
+ unsigned int elt, elnum;
+
switch ((*ptr)->type)
{
case FFI_TYPE_FLOAT:
@@ -549,28 +595,79 @@ ffi_prep_args64 (extended_cif *ecif, uns
if (align > 1)
next_arg.p = ALIGN (next_arg.p, align);
#endif
- words = ((*ptr)->size + 7) / 8;
- if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
- {
- size_t first = gpr_end.c - next_arg.c;
- memcpy (next_arg.c, *p_argv.c, first);
- memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
- next_arg.c = rest.c + words * 8 - first;
+ elt = 0;
+#if _CALL_ELF == 2
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
+#endif
+ if (elt)
+ {
+ union {
+ void *v;
+ float *f;
+ double *d;
+ } arg;
+
+ arg.v = *p_argv.v;
+ if (elt == FFI_TYPE_FLOAT)
+ {
+ do
+ {
+ double_tmp = *arg.f++;
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64
+ && i < nfixedargs)
+ *fpr_base.d++ = double_tmp;
+ else
+ *next_arg.f = (float) double_tmp;
+ if (++next_arg.f == gpr_end.f)
+ next_arg.f = rest.f;
+ fparg_count++;
+ }
+ while (--elnum != 0);
+ if ((next_arg.p & 3) != 0)
+ {
+ if (++next_arg.f == gpr_end.f)
+ next_arg.f = rest.f;
+ }
+ }
+ else
+ do
+ {
+ double_tmp = *arg.d++;
+ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+ *fpr_base.d++ = double_tmp;
+ else
+ *next_arg.d = double_tmp;
+ if (++next_arg.d == gpr_end.d)
+ next_arg.d = rest.d;
+ fparg_count++;
+ }
+ while (--elnum != 0);
}
else
{
- char *where = next_arg.c;
+ words = ((*ptr)->size + 7) / 8;
+ if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
+ {
+ size_t first = gpr_end.c - next_arg.c;
+ memcpy (next_arg.c, *p_argv.c, first);
+ memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
+ next_arg.c = rest.c + words * 8 - first;
+ }
+ else
+ {
+ char *where = next_arg.c;
#ifndef __LITTLE_ENDIAN__
- /* Structures with size less than eight bytes are passed
- left-padded. */
- if ((*ptr)->size < 8)
- where += 8 - (*ptr)->size;
-#endif
- memcpy (where, *p_argv.c, (*ptr)->size);
- next_arg.ul += words;
- if (next_arg.ul == gpr_end.ul)
- next_arg.ul = rest.ul;
+ /* Structures with size less than eight bytes are passed
+ left-padded. */
+ if ((*ptr)->size < 8)
+ where += 8 - (*ptr)->size;
+#endif
+ memcpy (where, *p_argv.c, (*ptr)->size);
+ next_arg.ul += words;
+ if (next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+ }
}
break;
@@ -626,11 +723,10 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
unsigned type = cif->rtype->type;
unsigned size = cif->rtype->size;
+ /* The machine-independent calculation of cif->bytes doesn't work
+ for us. Redo the calculation. */
if (cif->abi != FFI_LINUX64)
{
- /* All the machine-independent calculation of cif->bytes will be wrong.
- Redo the calculation for SYSV. */
-
/* Space for the frame pointer, callee's LR, and the asm's temp regs. */
bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
@@ -640,13 +736,20 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
else
{
/* 64-bit ABI. */
+#if _CALL_ELF == 2
+ /* Space for backchain, CR, LR, TOC and the asm's temp regs. */
+ bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
+ /* Space for the general registers. */
+ bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
+#else
/* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
regs. */
bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
/* Space for the mandatory parm save area and general registers. */
bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
+#endif
}
/* Return value handling. The rules for SYSV are as follows:
@@ -724,6 +827,25 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
flags |= FLAG_RETURNS_SMST;
break;
}
+#if _CALL_ELF == 2
+ if (cif->abi == FFI_LINUX64)
+ {
+ unsigned int elt, elnum;
+ elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
+ if (elt)
+ {
+ if (elt == FFI_TYPE_DOUBLE)
+ flags |= FLAG_RETURNS_64BITS;
+ flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
+ break;
+ }
+ if (size <= 16)
+ {
+ flags |= FLAG_RETURNS_SMST;
+ break;
+ }
+ }
+#endif
intarg_count++;
flags |= FLAG_RETVAL_REFERENCE;
/* Fall through. */
@@ -839,6 +961,7 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
else
for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
{
+ unsigned int elt, elnum;
#ifdef __STRUCT_PARM_ALIGN__
unsigned int align;
#endif
@@ -849,12 +972,16 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
case FFI_TYPE_LONGDOUBLE:
fparg_count += 2;
intarg_count += 2;
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
break;
#endif
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
fparg_count++;
intarg_count++;
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
break;
case FFI_TYPE_STRUCT:
@@ -867,6 +994,21 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
intarg_count = ALIGN (intarg_count, align);
#endif
intarg_count += ((*ptr)->size + 7) / 8;
+ elt = 0;
+#if _CALL_ELF == 2
+ elt = discover_homogeneous_aggregate (*ptr, &elnum);
+#endif
+ if (elt)
+ {
+ fparg_count += elnum;
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ }
+ else
+ {
+ if (intarg_count > NUM_GPR_ARG_REGISTERS)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ }
break;
case FFI_TYPE_POINTER:
@@ -882,9 +1024,11 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
/* Everything else is passed as a 8-byte word in a GPR, either
the object itself or a pointer to it. */
intarg_count++;
+ if (intarg_count > NUM_GPR_ARG_REGISTERS)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
break;
default:
- FFI_ASSERT (0);
+ FFI_ASSERT (0);
}
}
@@ -922,8 +1066,13 @@ ffi_prep_cif_machdep_core (ffi_cif *cif)
#endif
/* Stack space. */
+#if _CALL_ELF == 2
+ if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
+ bytes += intarg_count * sizeof (long);
+#else
if (intarg_count > NUM_GPR_ARG_REGISTERS64)
bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
+#endif
}
/* The stack space allocated needs to be a multiple of 16 bytes. */
@@ -951,6 +1100,10 @@ ffi_prep_cif_machdep_var (ffi_cif *cif,
unsigned int ntotalargs MAYBE_UNUSED)
{
cif->nfixedargs = nfixedargs;
+#if _CALL_ELF == 2
+ if (cif->abi == FFI_LINUX64)
+ cif->flags |= FLAG_ARG_NEEDS_PSAVE;
+#endif
return ffi_prep_cif_machdep_core (cif);
}
@@ -970,8 +1123,11 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
*
* We bounce-buffer SYSV small struct return values so that sysv.S
* can write r3 and r4 to memory without worrying about struct size.
+ *
+ * For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
+ * for similar reasons.
*/
- unsigned int smst_buffer[2];
+ unsigned long smst_buffer[8];
extended_cif ecif;
ecif.cif = cif;
@@ -1013,10 +1169,11 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
#ifndef __LITTLE_ENDIAN__
/* The SYSV ABI returns a structure of up to 4 bytes in size
left-padded in r3. */
- if (rsize <= 4)
+ if (cif->abi == FFI_SYSV && rsize <= 4)
memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
/* The SYSV ABI returns a structure of up to 8 bytes in size
- left-padded in r3/r4. */
+ left-padded in r3/r4, and the ELFv2 ABI similarly returns a
+ structure of up to 8 bytes in size left-padded in r3. */
else if (rsize <= 8)
memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
else
@@ -1026,7 +1183,7 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
}
-#ifndef POWERPC64
+#if !defined POWERPC64 || _CALL_ELF == 2
#define MIN_CACHE_LINE_SIZE 8
static void
@@ -1050,6 +1207,22 @@ ffi_prep_closure_loc (ffi_closure *closu
void *codeloc)
{
#ifdef POWERPC64
+# if _CALL_ELF == 2
+ unsigned int *tramp = (unsigned int *) &closure->tramp[0];
+
+ if (cif->abi != FFI_LINUX64)
+ return FFI_BAD_ABI;
+
+ tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */
+ tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */
+ tramp[2] = 0x7d8903a6; /* mtctr 12 */
+ tramp[3] = 0x4e800420; /* bctr */
+ /* 1: .quad function_addr */
+ /* 2: .quad context */
+ *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
+ *(void **) &tramp[6] = codeloc;
+ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
+# else
void **tramp = (void **) &closure->tramp[0];
if (cif->abi != FFI_LINUX64)
@@ -1057,6 +1230,7 @@ ffi_prep_closure_loc (ffi_closure *closu
/* Copy function address and TOC from ffi_closure_LINUX64. */
memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
tramp[2] = codeloc;
+# endif
#else
unsigned int *tramp;
@@ -1413,9 +1587,10 @@ ffi_closure_helper_LINUX64 (ffi_closure
cif = closure->cif;
avalue = alloca (cif->nargs * sizeof (void *));
- /* Copy the caller's structure return value address so that the closure
- returns the data directly to the caller. */
- if (cif->rtype->type == FFI_TYPE_STRUCT)
+ /* Copy the caller's structure return value address so that the
+ closure returns the data directly to the caller. */
+ if (cif->rtype->type == FFI_TYPE_STRUCT
+ && (cif->flags & FLAG_RETURNS_SMST) == 0)
{
rvalue = (void *) *pst;
pst++;
@@ -1429,6 +1604,8 @@ ffi_closure_helper_LINUX64 (ffi_closure
/* Grab the addresses of the arguments from the stack frame. */
while (i < avn)
{
+ unsigned int elt, elnum;
+
switch (arg_types[i]->type)
{
case FFI_TYPE_SINT8:
@@ -1470,14 +1647,75 @@ ffi_closure_helper_LINUX64 (ffi_closure
if (align > 1)
pst = (unsigned long *) ALIGN ((size_t) pst, align);
#endif
-#ifndef __LITTLE_ENDIAN__
- /* Structures with size less than eight bytes are passed
- left-padded. */
- if (arg_types[i]->size < 8)
- avalue[i] = (char *) pst + 8 - arg_types[i]->size;
+ elt = 0;
+#if _CALL_ELF == 2
+ elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
+#endif
+ if (elt)
+ {
+ union {
+ void *v;
+ unsigned long *ul;
+ float *f;
+ double *d;
+ size_t p;
+ } to, from;
+
+ /* Repackage the aggregate from its parts. The
+ aggregate size is not greater than the space taken by
+ the registers so store back to the register/parameter
+ save arrays. */
+ if (pfr + elnum <= end_pfr)
+ to.v = pfr;
+ else
+ to.v = pst;
+
+ avalue[i] = to.v;
+ from.ul = pst;
+ if (elt == FFI_TYPE_FLOAT)
+ {
+ do
+ {
+ if (pfr < end_pfr && i < nfixedargs)
+ {
+ *to.f = (float) pfr->d;
+ pfr++;
+ }
+ else
+ *to.f = *from.f;
+ to.f++;
+ from.f++;
+ }
+ while (--elnum != 0);
+ }
+ else
+ {
+ do
+ {
+ if (pfr < end_pfr && i < nfixedargs)
+ {
+ *to.d = pfr->d;
+ pfr++;
+ }
+ else
+ *to.d = *from.d;
+ to.d++;
+ from.d++;
+ }
+ while (--elnum != 0);
+ }
+ }
else
+ {
+#ifndef __LITTLE_ENDIAN__
+ /* Structures with size less than eight bytes are passed
+ left-padded. */
+ if (arg_types[i]->size < 8)
+ avalue[i] = (char *) pst + 8 - arg_types[i]->size;
+ else
#endif
- avalue[i] = pst;
+ avalue[i] = pst;
+ }
pst += (arg_types[i]->size + 7) / 8;
break;
@@ -1548,5 +1786,14 @@ ffi_closure_helper_LINUX64 (ffi_closure
(closure->fun) (cif, rvalue, avalue, closure->user_data);
/* Tell ffi_closure_LINUX64 how to perform return type promotions. */
+ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
+ {
+ if ((cif->flags & FLAG_RETURNS_FP) == 0)
+ return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
+ else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
+ return FFI_V2_TYPE_DOUBLE_HOMOG;
+ else
+ return FFI_V2_TYPE_FLOAT_HOMOG;
+ }
return cif->rtype->type;
}
@@ -32,15 +32,22 @@
#ifdef __powerpc64__
.hidden ffi_call_LINUX64
.globl ffi_call_LINUX64
+# if _CALL_ELF == 2
+ .text
+ffi_call_LINUX64:
+ addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha
+ addi %r2, %r2, .TOC.-ffi_call_LINUX64@l
+ .localentry ffi_call_LINUX64, . - ffi_call_LINUX64
+# else
.section ".opd","aw"
.align 3
ffi_call_LINUX64:
-#ifdef _CALL_LINUX
+# ifdef _CALL_LINUX
.quad .L.ffi_call_LINUX64,.TOC.@tocbase,0
.type ffi_call_LINUX64,@function
.text
.L.ffi_call_LINUX64:
-#else
+# else
.hidden .ffi_call_LINUX64
.globl .ffi_call_LINUX64
.quad .ffi_call_LINUX64,.TOC.@tocbase,0
@@ -48,7 +55,8 @@ ffi_call_LINUX64:
.type .ffi_call_LINUX64,@function
.text
.ffi_call_LINUX64:
-#endif
+# endif
+# endif
.LFB1:
mflr %r0
std %r28, -32(%r1)
@@ -63,26 +71,35 @@ ffi_call_LINUX64:
mr %r31, %r5 /* flags, */
mr %r30, %r6 /* rvalue, */
mr %r29, %r7 /* function address. */
+/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
+ bctrl function call. */
+# if _CALL_ELF == 2
+ std %r2, 24(%r1)
+# else
std %r2, 40(%r1)
+# endif
/* Call ffi_prep_args64. */
mr %r4, %r1
-#ifdef _CALL_LINUX
+# if defined _CALL_LINUX || _CALL_ELF == 2
bl ffi_prep_args64
-#else
+# else
bl .ffi_prep_args64
-#endif
+# endif
- ld %r0, 0(%r29)
+# if _CALL_ELF == 2
+ mr %r12, %r29
+# else
+ ld %r12, 0(%r29)
ld %r2, 8(%r29)
ld %r11, 16(%r29)
-
+# endif
/* Now do the call. */
/* Set up cr1 with bits 4-7 of the flags. */
mtcrf 0x40, %r31
/* Get the address to call into CTR. */
- mtctr %r0
+ mtctr %r12
/* Load all those argument registers. */
ld %r3, -32-(8*8)(%r28)
ld %r4, -32-(7*8)(%r28)
@@ -117,12 +134,17 @@ ffi_call_LINUX64:
/* This must follow the call immediately, the unwinder
uses this to find out if r2 has been saved or not. */
+# if _CALL_ELF == 2
+ ld %r2, 24(%r1)
+# else
ld %r2, 40(%r1)
+# endif
/* Now, deal with the return value. */
mtcrf 0x01, %r31
- bt- 30, .Ldone_return_value
- bt- 29, .Lfp_return_value
+ bt 31, .Lstruct_return_value
+ bt 30, .Ldone_return_value
+ bt 29, .Lfp_return_value
std %r3, 0(%r30)
/* Fall through... */
@@ -147,14 +169,48 @@ ffi_call_LINUX64:
.Lfloat_return_value:
stfs %f1, 0(%r30)
b .Ldone_return_value
+
+.Lstruct_return_value:
+ bf 29, .Lsmall_struct
+ bf 28, .Lfloat_homog_return_value
+ stfd %f1, 0(%r30)
+ stfd %f2, 8(%r30)
+ stfd %f3, 16(%r30)
+ stfd %f4, 24(%r30)
+ stfd %f5, 32(%r30)
+ stfd %f6, 40(%r30)
+ stfd %f7, 48(%r30)
+ stfd %f8, 56(%r30)
+ b .Ldone_return_value
+
+.Lfloat_homog_return_value:
+ stfs %f1, 0(%r30)
+ stfs %f2, 4(%r30)
+ stfs %f3, 8(%r30)
+ stfs %f4, 12(%r30)
+ stfs %f5, 16(%r30)
+ stfs %f6, 20(%r30)
+ stfs %f7, 24(%r30)
+ stfs %f8, 28(%r30)
+ b .Ldone_return_value
+
+.Lsmall_struct:
+ std %r3, 0(%r30)
+ std %r4, 8(%r30)
+ b .Ldone_return_value
+
.LFE1:
.long 0
.byte 0,12,0,1,128,4,0,0
-#ifdef _CALL_LINUX
+# if _CALL_ELF == 2
+ .size ffi_call_LINUX64,.-ffi_call_LINUX64
+# else
+# ifdef _CALL_LINUX
.size ffi_call_LINUX64,.-.L.ffi_call_LINUX64
-#else
+# else
.size .ffi_call_LINUX64,.-.ffi_call_LINUX64
-#endif
+# endif
+# endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
@@ -33,15 +33,22 @@
#ifdef __powerpc64__
FFI_HIDDEN (ffi_closure_LINUX64)
.globl ffi_closure_LINUX64
+# if _CALL_ELF == 2
+ .text
+ffi_closure_LINUX64:
+ addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha
+ addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l
+ .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
+# else
.section ".opd","aw"
.align 3
ffi_closure_LINUX64:
-#ifdef _CALL_LINUX
+# ifdef _CALL_LINUX
.quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0
.type ffi_closure_LINUX64,@function
.text
.L.ffi_closure_LINUX64:
-#else
+# else
FFI_HIDDEN (.ffi_closure_LINUX64)
.globl .ffi_closure_LINUX64
.quad .ffi_closure_LINUX64,.TOC.@tocbase,0
@@ -49,15 +56,52 @@ ffi_closure_LINUX64:
.type .ffi_closure_LINUX64,@function
.text
.ffi_closure_LINUX64:
-#endif
+# endif
+# endif
+# if _CALL_ELF == 2
+# 32 byte special reg save area + 64 byte parm save area and retval
+# + 13*8 fpr save area + round to 16
+# define STACKFRAME 208
+# define PARMSAVE 32
+# No parameter save area is needed for the call to ffi_closure_helper_LINUX64,
+# so return value can start there.
+# define RETVAL PARMSAVE
+# else
# 48 bytes special reg save area + 64 bytes parm save area
# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
# define STACKFRAME 240
# define PARMSAVE 48
# define RETVAL PARMSAVE+64
+# endif
.LFB1:
+# if _CALL_ELF == 2
+ ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif
+ mflr %r0
+ lwz %r12, 28(%r12) # cif->flags
+ mtcrf 0x40, %r12
+ addi %r12, %r1, PARMSAVE
+ bt 7, .Lparmsave
+ # Our caller has not allocated a parameter save area.
+ # We need to allocate one here and use it to pass gprs to
+ # ffi_closure_helper_LINUX64. The return value area will do.
+ addi %r12, %r1, -STACKFRAME+RETVAL
+.Lparmsave:
+ std %r0, 16(%r1)
+ # Save general regs into parm save area
+ std %r3, 0(%r12)
+ std %r4, 8(%r12)
+ std %r5, 16(%r12)
+ std %r6, 24(%r12)
+ std %r7, 32(%r12)
+ std %r8, 40(%r12)
+ std %r9, 48(%r12)
+ std %r10, 56(%r12)
+
+ # load up the pointer to the parm save area
+ mr %r5, %r12
+# else
mflr %r0
# Save general regs into parm save area
# This is the parameter save area set up by our caller.
@@ -74,6 +118,7 @@ ffi_closure_LINUX64:
# load up the pointer to the parm save area
addi %r5, %r1, PARMSAVE
+# endif
# next save fpr 1 to fpr 13
stfd %f1, -104+(0*8)(%r1)
@@ -103,11 +148,11 @@ ffi_closure_LINUX64:
mr %r3, %r11
# make the call
-#ifdef _CALL_LINUX
+# if defined _CALL_LINUX || _CALL_ELF == 2
bl ffi_closure_helper_LINUX64
-#else
+# else
bl .ffi_closure_helper_LINUX64
-#endif
+# endif
.Lret:
# now r3 contains the return type
@@ -116,10 +161,12 @@ ffi_closure_LINUX64:
# look up the proper starting point in table
# by using return type as offset
+ ld %r0, STACKFRAME+16(%r1)
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
+ bge .Lsmall
mflr %r4 # move address of .Lret to r4
sldi %r3, %r3, 4 # now multiply return type by 16
addi %r4, %r4, .Lret_type0 - .Lret
- ld %r0, STACKFRAME+16(%r1)
add %r3, %r3, %r4 # add contents of table to table address
mtctr %r3
bctr # jump to it
@@ -234,15 +281,73 @@ ffi_closure_LINUX64:
mtlr %r0
addi %r1, %r1, STACKFRAME
blr
-# esac
+# case FFI_V2_TYPE_FLOAT_HOMOG
+ lfs %f1, RETVAL+0(%r1)
+ lfs %f2, RETVAL+4(%r1)
+ lfs %f3, RETVAL+8(%r1)
+ b .Lmorefloat
+# case FFI_V2_TYPE_DOUBLE_HOMOG
+ lfd %f1, RETVAL+0(%r1)
+ lfd %f2, RETVAL+8(%r1)
+ lfd %f3, RETVAL+16(%r1)
+ lfd %f4, RETVAL+24(%r1)
+ mtlr %r0
+ lfd %f5, RETVAL+32(%r1)
+ lfd %f6, RETVAL+40(%r1)
+ lfd %f7, RETVAL+48(%r1)
+ lfd %f8, RETVAL+56(%r1)
+ addi %r1, %r1, STACKFRAME
+ blr
+.Lmorefloat:
+ lfs %f4, RETVAL+12(%r1)
+ mtlr %r0
+ lfs %f5, RETVAL+16(%r1)
+ lfs %f6, RETVAL+20(%r1)
+ lfs %f7, RETVAL+24(%r1)
+ lfs %f8, RETVAL+28(%r1)
+ addi %r1, %r1, STACKFRAME
+ blr
+.Lsmall:
+# ifdef __LITTLE_ENDIAN__
+ ld %r3,RETVAL+0(%r1)
+ mtlr %r0
+ ld %r4,RETVAL+8(%r1)
+ addi %r1, %r1, STACKFRAME
+ blr
+# else
+ # A struct smaller than a dword is returned in the low bits of r3
+ # ie. right justified. Larger structs are passed left justified
+ # in r3 and r4. The return value area on the stack will have
+ # the structs as they are usually stored in memory.
+ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
+ neg %r5, %r3
+ ld %r3,RETVAL+0(%r1)
+ blt .Lsmalldown
+ mtlr %r0
+ ld %r4,RETVAL+8(%r1)
+ addi %r1, %r1, STACKFRAME
+ blr
+.Lsmalldown:
+ addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
+ mtlr %r0
+ sldi %r5, %r5, 3
+ addi %r1, %r1, STACKFRAME
+ srd %r3, %r3, %r5
+ blr
+# endif
+
.LFE1:
.long 0
.byte 0,12,0,1,128,0,0,0
-#ifdef _CALL_LINUX
+# if _CALL_ELF == 2
+ .size ffi_closure_LINUX64,.-ffi_closure_LINUX64
+# else
+# ifdef _CALL_LINUX
.size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
-#else
+# else
.size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64
-#endif
+# endif
+# endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1: