diff mbox

[libffi] Build with msvcc

Message ID m3pqxw1fg9.fsf@redhat.com
State New
Headers show

Commit Message

Anthony Green Aug. 6, 2010, 5:24 a.m. UTC
I'm checking in the following libffi patch.

This patch from Dan Witte adds build machinery for the MS compiler.  I'm
committing this in order to keep GCC's libffi in sync with upstream.

Thanks,

AG


2010-01-15  Anthony Green  <green@redhat.com>

	* README: Add notes on building with Microsoft Visual C++.

2010-01-15  Daniel Witte  <dwitte@mozilla.com>

	* msvcc.sh: New file.

	* src/x86/win32.S: Port assembly routines to MSVC and #ifdef.
	* src/x86/ffi.c: Tweak function declaration and remove excess
	parens.
	* include/ffi.h.in: Add __declspec(align(8)) to typedef struct
	ffi_closure.

	* src/x86/ffi.c: Merge ffi_call_SYSV and ffi_call_STDCALL into new
	function ffi_call_win32 on X86_WIN32.
	* src/x86/win32.S (ffi_call_SYSV): Rename to ffi_call_win32.
	(ffi_call_STDCALL): Remove.

	* src/prep_cif.c (ffi_prep_cif): Move stack space allocation code
	to ffi_prep_cif_machdep for x86.
	* src/x86/ffi.c (ffi_prep_cif_machdep): To here.
diff mbox

Patch

Index: libffi/include/ffi.h.in
===================================================================
--- libffi.orig/include/ffi.h.in
+++ libffi/include/ffi.h.in
@@ -251,6 +251,9 @@  size_t ffi_java_raw_size (ffi_cif *cif);
 
 #if FFI_CLOSURES
 
+#ifdef _MSC_VER
+__declspec(align(8))
+#endif
 typedef struct {
   char tramp[FFI_TRAMPOLINE_SIZE];
   ffi_cif   *cif;
Index: libffi/src/prep_cif.c
===================================================================
--- libffi.orig/src/prep_cif.c
+++ libffi/src/prep_cif.c
@@ -109,16 +109,13 @@  ffi_status ffi_prep_cif(ffi_cif *cif, ff
   /* Perform a sanity check on the return type */
   FFI_ASSERT_VALID_TYPE(cif->rtype);
 
-  /* x86-64 and s390 stack space allocation is handled in prep_machdep.  */
-#if !defined M68K && !defined __x86_64__ && !defined S390 && !defined PA
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
+#if !defined M68K && !defined __i386__ && !defined __x86_64__ && !defined S390 && !defined PA
   /* Make space for the return structure pointer */
   if (cif->rtype->type == FFI_TYPE_STRUCT
 #ifdef SPARC
       && (cif->abi != FFI_V9 || cif->rtype->size > 32)
 #endif
-#ifdef X86_DARWIN
-      && (cif->rtype->size > 8)
-#endif
      )
     bytes = STACK_ARG_SIZE(sizeof(void*));
 #endif
@@ -134,7 +131,7 @@  ffi_status ffi_prep_cif(ffi_cif *cif, ff
 	 check after the initialization.  */
       FFI_ASSERT_VALID_TYPE(*ptr);
 
-#if !defined __x86_64__ && !defined S390 && !defined PA
+#if !defined __i386__ && !defined __x86_64__ && !defined S390 && !defined PA
 #ifdef SPARC
       if (((*ptr)->type == FFI_TYPE_STRUCT
 	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
Index: libffi/src/x86/ffi.c
===================================================================
--- libffi.orig/src/x86/ffi.c
+++ libffi/src/x86/ffi.c
@@ -148,13 +148,13 @@  void ffi_prep_args(char *stack, extended
 /* Perform machine dependent cif processing */
 ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 {
+  unsigned int i;
+  ffi_type **ptr;
+
   /* Set the return type flag */
   switch (cif->rtype->type)
     {
     case FFI_TYPE_VOID:
-#ifdef X86
-    case FFI_TYPE_STRUCT:
-#endif
 #if defined(X86) || defined (X86_WIN32) || defined(X86_FREEBSD) || defined(X86_DARWIN) || defined(X86_WIN64)
     case FFI_TYPE_UINT8:
     case FFI_TYPE_UINT16:
@@ -165,7 +165,6 @@  ffi_status ffi_prep_cif_machdep(ffi_cif 
     case FFI_TYPE_UINT32:
     case FFI_TYPE_SINT32:
 #endif
-
     case FFI_TYPE_SINT64:
     case FFI_TYPE_FLOAT:
     case FFI_TYPE_DOUBLE:
@@ -184,8 +183,8 @@  ffi_status ffi_prep_cif_machdep(ffi_cif 
       cif->flags = FFI_TYPE_SINT64;
       break;
 
-#ifndef X86
     case FFI_TYPE_STRUCT:
+#ifndef X86
       if (cif->rtype->size == 1)
         {
           cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
@@ -207,15 +206,13 @@  ffi_status ffi_prep_cif_machdep(ffi_cif 
           cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
         }
       else
+#endif
         {
           cif->flags = FFI_TYPE_STRUCT;
-#ifdef X86_WIN64
           // allocate space for return value pointer
           cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
-#endif
         }
       break;
-#endif
 
     default:
 #ifdef X86_WIN64
@@ -229,41 +226,36 @@  ffi_status ffi_prep_cif_machdep(ffi_cif 
       break;
     }
 
-#ifdef X86_DARWIN
-  cif->bytes = (cif->bytes + 15) & ~0xF;
-#endif
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
 
 #ifdef X86_WIN64
-  {
-    unsigned int i;
-    ffi_type **ptr;
-
-    for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
-      {
-        if (((*ptr)->alignment - 1) & cif->bytes)
-          cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
-        cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
-      }
-  }
   // ensure space for storing four registers
   cif->bytes += 4 * sizeof(ffi_arg);
 #endif
 
+#ifdef X86_DARWIN
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
   return FFI_OK;
 }
 
-extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
-                          unsigned, unsigned, unsigned *, void (*fn)(void));
-
-#ifdef X86_WIN32
-extern void ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *,
-                          unsigned, unsigned, unsigned *, void (*fn)(void));
-
-#endif /* X86_WIN32 */
 #ifdef X86_WIN64
 extern int
 ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
                unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
 #endif
 
 void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
@@ -321,18 +313,18 @@  void ffi_call(ffi_cif *cif, void (*fn)(v
                        cif->flags, ecif.rvalue, fn);
       }
       break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
 #else
     case FFI_SYSV:
       ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
                     fn);
       break;
-#ifdef X86_WIN32
-    case FFI_STDCALL:
-      ffi_call_STDCALL(ffi_prep_args, &ecif, cif->bytes, cif->flags,
-                       ecif.rvalue, fn);
-      break;
-#endif /* X86_WIN32 */
-#endif /* X86_WIN64 */
+#endif
     default:
       FFI_ASSERT(0);
       break;
@@ -342,6 +334,8 @@  void ffi_call(ffi_cif *cif, void (*fn)(v
 
 /** private members **/
 
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC - standard cdecl convention applies. */
 static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
                                          void** args, ffi_cif* cif);
 void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
@@ -390,11 +384,8 @@  ffi_closure_win64_inner (ffi_closure *cl
 }
 
 #else
-unsigned int FFI_HIDDEN
-ffi_closure_SYSV_inner (closure, respp, args)
-     ffi_closure *closure;
-     void **respp;
-     void *args;
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
 {
   /* our various things...  */
   ffi_cif       *cif;
@@ -505,7 +496,7 @@  ffi_prep_incoming_args_SYSV(char *stack,
 /* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
 
 #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
-({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
    unsigned int  __dis = __fun - (__ctx + 10);  \
@@ -513,10 +504,10 @@  ffi_prep_incoming_args_SYSV(char *stack,
    *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
    *(unsigned char *)  &__tramp[5] = 0xe9; \
    *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
- })
+ }
 
 #define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
-({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
    unsigned int  __dis = __fun - (__ctx + 10); \
@@ -527,7 +518,7 @@  ffi_prep_incoming_args_SYSV(char *stack,
    *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
    *(unsigned char *)  &__tramp[10] = 0xc2; \
    *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
- })
+ }
 
 /* the cif must already be prep'ed */
 
@@ -627,16 +618,6 @@  ffi_prep_args_raw(char *stack, extended_
  * libffi-1.20, this is not the case.)
  */
 
-extern void
-ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, unsigned, 
-              unsigned, unsigned *, void (*fn)(void));
-
-#ifdef X86_WIN32
-extern void
-ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *, unsigned,
-                 unsigned, unsigned *, void (*fn)(void));
-#endif /* X86_WIN32 */
-
 void
 ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
 {
@@ -660,16 +641,18 @@  ffi_raw_call(ffi_cif *cif, void (*fn)(vo
   
   switch (cif->abi) 
     {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
+#else
     case FFI_SYSV:
       ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
                     ecif.rvalue, fn);
       break;
-#ifdef X86_WIN32
-    case FFI_STDCALL:
-      ffi_call_STDCALL(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
-                       ecif.rvalue, fn);
-      break;
-#endif /* X86_WIN32 */
+#endif
     default:
       FFI_ASSERT(0);
       break;
Index: libffi/src/x86/win32.S
===================================================================
--- libffi.orig/src/x86/win32.S
+++ libffi/src/x86/win32.S
@@ -2,6 +2,7 @@ 
    win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
 	     Copyright (c) 2001  John Beniton
 	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
 			
  
    X86 Foreign Function Interface
@@ -31,14 +32,371 @@ 
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
- 
+
+#ifdef _MSC_VER
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+    ffi_prep_args : NEAR PTR DWORD,
+    ecif          : NEAR PTR DWORD,
+    cif_bytes     : DWORD,
+    cif_flags     : DWORD,
+    rvalue        : NEAR PTR DWORD,
+    fn            : NEAR PTR DWORD
+
+        ;; Make room for all of the new args.
+        mov  ecx, cif_bytes
+        sub  esp, ecx
+
+        mov  eax, esp
+
+        ;; Place all of the ffi_prep_args in position
+        push ecif
+        push eax
+        call ffi_prep_args
+
+        ;; Return stack to previous state and call the function
+        add  esp, 8
+
+        call fn
+
+        ;; cdecl:   we restore esp in the epilogue, so there's no need to
+        ;;          remove the space we pushed for the args.
+        ;; stdcall: the callee has already cleaned the stack.
+
+        ;; Load ecx with the return type code
+        mov  ecx, cif_flags
+
+        ;; If the return value pointer is NULL, assume no return value.
+        cmp  rvalue, 0
+        jne  ca_jumptable
+
+        ;; Even if there is no space for the return value, we are
+        ;; obliged to handle floating-point values.
+        cmp  ecx, FFI_TYPE_FLOAT
+        jne  ca_epilogue
+        fstp st(0)
+
+        jmp  ca_epilogue
+
+ca_jumptable:
+        jmp  [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+        ;; Do not insert anything here between label and jump table.
+        dd offset ca_epilogue       ;; FFI_TYPE_VOID
+        dd offset ca_retint         ;; FFI_TYPE_INT
+        dd offset ca_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset ca_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset ca_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset ca_retint8        ;; FFI_TYPE_UINT8
+        dd offset ca_retint8        ;; FFI_TYPE_SINT8
+        dd offset ca_retint16       ;; FFI_TYPE_UINT16
+        dd offset ca_retint16       ;; FFI_TYPE_SINT16
+        dd offset ca_retint         ;; FFI_TYPE_UINT32
+        dd offset ca_retint         ;; FFI_TYPE_SINT32
+        dd offset ca_retint64       ;; FFI_TYPE_UINT64
+        dd offset ca_retint64       ;; FFI_TYPE_SINT64
+        dd offset ca_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset ca_retint         ;; FFI_TYPE_POINTER
+        dd offset ca_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset ca_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset ca_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+ca_retint8:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], al
+        jmp   ca_epilogue
+
+ca_retint16:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], ax
+        jmp   ca_epilogue
+
+ca_retint:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        jmp   ca_epilogue
+
+ca_retint64:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        mov   [ecx + 4], edx
+        jmp   ca_epilogue
+
+ca_retfloat:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  DWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  QWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retlongdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  TBYTE PTR [ecx]
+        jmp   ca_epilogue
+
+ca_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_call_win32 ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 8]
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cs_jumptable:
+        jmp  [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cs_epilogue       ;; FFI_TYPE_VOID
+        dd offset cs_retint         ;; FFI_TYPE_INT
+        dd offset cs_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cs_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cs_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cs_retint8        ;; FFI_TYPE_UINT8
+        dd offset cs_retint8        ;; FFI_TYPE_SINT8
+        dd offset cs_retint16       ;; FFI_TYPE_UINT16
+        dd offset cs_retint16       ;; FFI_TYPE_SINT16
+        dd offset cs_retint         ;; FFI_TYPE_UINT32
+        dd offset cs_retint         ;; FFI_TYPE_SINT32
+        dd offset cs_retint64       ;; FFI_TYPE_UINT64
+        dd offset cs_retint64       ;; FFI_TYPE_SINT64
+        dd offset cs_retstruct      ;; FFI_TYPE_STRUCT
+        dd offset cs_retint         ;; FFI_TYPE_POINTER
+        dd offset cs_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cs_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cs_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cs_retint8:
+        mov   al, [ecx]
+        jmp   cs_epilogue
+
+cs_retint16:
+        mov   ax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint:
+        mov   eax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cs_epilogue
+
+cs_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retstruct:
+        ;; Caller expects us to pop struct return value pointer hidden arg.
+        ;; Epilogue code is autogenerated.
+        ret	4
+
+cs_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ffi_closure_raw_SYSV PROC NEAR USES esi
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov  [esp + 12], edx                            ;; user_data
+        lea  edx, [ebp + 8]
+        mov  [esp + 8], edx                             ;; raw_args
+        lea  edx, [ebp - 24]
+        mov  [esp + 4], edx                             ;; &res
+        mov  [esp], esi                                 ;; cif
+        call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET]   ;; closure->fun
+        mov  eax, [esi + CIF_FLAGS_OFFSET]              ;; cif->flags
+        lea  ecx, [ebp - 24]
+
+cr_jumptable:
+        jmp  [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cr_epilogue       ;; FFI_TYPE_VOID
+        dd offset cr_retint         ;; FFI_TYPE_INT
+        dd offset cr_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cr_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cr_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cr_retint8        ;; FFI_TYPE_UINT8
+        dd offset cr_retint8        ;; FFI_TYPE_SINT8
+        dd offset cr_retint16       ;; FFI_TYPE_UINT16
+        dd offset cr_retint16       ;; FFI_TYPE_SINT16
+        dd offset cr_retint         ;; FFI_TYPE_UINT32
+        dd offset cr_retint         ;; FFI_TYPE_SINT32
+        dd offset cr_retint64       ;; FFI_TYPE_UINT64
+        dd offset cr_retint64       ;; FFI_TYPE_SINT64
+        dd offset cr_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cr_retint         ;; FFI_TYPE_POINTER
+        dd offset cr_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cr_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cr_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cr_retint8:
+        mov   al, [ecx]
+        jmp   cr_epilogue
+
+cr_retint16:
+        mov   ax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint:
+        mov   eax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cr_epilogue
+
+cr_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 12]         ;; account for stub return address on stack
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cd_jumptable:
+        jmp  [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cd_epilogue       ;; FFI_TYPE_VOID
+        dd offset cd_retint         ;; FFI_TYPE_INT
+        dd offset cd_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cd_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cd_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cd_retint8        ;; FFI_TYPE_UINT8
+        dd offset cd_retint8        ;; FFI_TYPE_SINT8
+        dd offset cd_retint16       ;; FFI_TYPE_UINT16
+        dd offset cd_retint16       ;; FFI_TYPE_SINT16
+        dd offset cd_retint         ;; FFI_TYPE_UINT32
+        dd offset cd_retint         ;; FFI_TYPE_SINT32
+        dd offset cd_retint64       ;; FFI_TYPE_UINT64
+        dd offset cd_retint64       ;; FFI_TYPE_SINT64
+        dd offset cd_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cd_retint         ;; FFI_TYPE_POINTER
+        dd offset cd_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cd_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cd_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retint8:
+        mov   al, [ecx]
+        jmp   cd_epilogue
+
+cd_retint16:
+        mov   ax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint:
+        mov   eax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cd_epilogue
+
+cd_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
+
+#else
+
 	.text
  
         # This assumes we are using gas.
         .balign 16
-	.globl	_ffi_call_SYSV
-	.def	_ffi_call_SYSV;	.scl	2;	.type	32;	.endef
-_ffi_call_SYSV:
+	.globl	_ffi_call_win32
+	.def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+_ffi_call_win32:
 .LFB1:
         pushl %ebp
 .LCFI0:
@@ -61,8 +419,10 @@  _ffi_call_SYSV:
         # FIXME: Align the stack to a 128-bit boundary to avoid
         # potential performance hits.
 
-	call  *28(%ebp)
+        call  *28(%ebp)
  
+        # stdcall functions pop arguments off the stack themselves
+
         # Load %ecx with the return type code
         movl  20(%ebp),%ecx
  
@@ -181,164 +541,11 @@  _ffi_call_SYSV:
         movl %ebp,%esp
         popl %ebp
         ret
-.ffi_call_SYSV_end:
+.ffi_call_win32_end:
 .LFE1:
 
         # This assumes we are using gas.
         .balign 16
-	.globl	_ffi_call_STDCALL
-	.def	_ffi_call_STDCALL;	.scl	2;	.type	32;	.endef
-_ffi_call_STDCALL:
-.LFB2:
-        pushl %ebp
-.LCFI2:
-        movl  %esp,%ebp
-.LCFI3:
-        # Make room for all of the new args.
-        movl  16(%ebp),%ecx 
-        subl  %ecx,%esp
-
-        movl  %esp,%eax
-
-        # Place all of the ffi_prep_args in position
-        pushl 12(%ebp)
-        pushl %eax
-        call  *8(%ebp)
-
-        # Return stack to previous state and call the function
-        addl  $8,%esp
-
-        # FIXME: Align the stack to a 128-bit boundary to avoid
-        # potential performance hits.
-
-        call  *28(%ebp)
-
-        # stdcall functions pop arguments off the stack themselves
-
-        # Load %ecx with the return type code
-        movl  20(%ebp),%ecx
-
-        # If the return value pointer is NULL, assume no return value.
-        cmpl  $0,24(%ebp)
-        jne   0f
-
-        # Even if there is no space for the return value, we are
-        # obliged to handle floating-point values.
-        cmpl  $FFI_TYPE_FLOAT,%ecx
-        jne   .Lsc_noretval
-        fstp  %st(0)
-
-        jmp   .Lsc_epilogue
-
-0:
-	call	1f
-	# Do not insert anything here between the call and the jump table.
-.Lsc_store_table:
-	.long	.Lsc_noretval		/* FFI_TYPE_VOID */
-	.long	.Lsc_retint		/* FFI_TYPE_INT */
-	.long	.Lsc_retfloat		/* FFI_TYPE_FLOAT */
-	.long	.Lsc_retdouble		/* FFI_TYPE_DOUBLE */
-	.long	.Lsc_retlongdouble	/* FFI_TYPE_LONGDOUBLE */
-	.long	.Lsc_retuint8		/* FFI_TYPE_UINT8 */
-	.long	.Lsc_retsint8		/* FFI_TYPE_SINT8 */
-	.long	.Lsc_retuint16		/* FFI_TYPE_UINT16 */
-	.long	.Lsc_retsint16		/* FFI_TYPE_SINT16 */
-	.long	.Lsc_retint		/* FFI_TYPE_UINT32 */
-	.long	.Lsc_retint		/* FFI_TYPE_SINT32 */
-	.long	.Lsc_retint64		/* FFI_TYPE_UINT64 */
-	.long	.Lsc_retint64		/* FFI_TYPE_SINT64 */
-	.long	.Lsc_retstruct		/* FFI_TYPE_STRUCT */
-	.long	.Lsc_retint		/* FFI_TYPE_POINTER */
-	.long	.Lsc_retstruct1b	/* FFI_TYPE_SMALL_STRUCT_1B */
-	.long	.Lsc_retstruct2b	/* FFI_TYPE_SMALL_STRUCT_2B */
-	.long	.Lsc_retstruct4b	/* FFI_TYPE_SMALL_STRUCT_4B */
-
-1:
-	add	%ecx, %ecx
-	add	%ecx, %ecx
-	add	(%esp),%ecx
-	add	$4, %esp
-	jmp	*(%ecx)
-
-	/* Sign/zero extend as appropriate.  */
-.Lsc_retsint8:
-	movsbl	%al, %eax
-	jmp	.Lsc_retint
-
-.Lsc_retsint16:
-	movswl	%ax, %eax
-	jmp	.Lsc_retint
-
-.Lsc_retuint8:
-	movzbl	%al, %eax
-	jmp	.Lsc_retint
-
-.Lsc_retuint16:
-	movzwl	%ax, %eax
-	jmp	.Lsc_retint
-
-.Lsc_retint:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retfloat:
-         # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstps (%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retdouble:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpl (%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retlongdouble:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpt (%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retint64:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        movl  %edx,4(%ecx)
-	jmp   .Lsc_epilogue
-
-.Lsc_retstruct1b:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movb  %al,0(%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retstruct2b:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movw  %ax,0(%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retstruct4b:
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        jmp   .Lsc_epilogue
-
-.Lsc_retstruct:
-        # Nothing to do!
-
-.Lsc_noretval:
-.Lsc_epilogue:
-        movl %ebp,%esp
-        popl %ebp
-        ret
-.ffi_call_STDCALL_end:
-.LFE2:
-
-        # This assumes we are using gas.
-        .balign 16
 	.globl	_ffi_closure_SYSV
 	.def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
 _ffi_closure_SYSV:
@@ -742,38 +949,6 @@  _ffi_closure_STDCALL:
 .LEFDE1:
 
 
-.LSFDE2:
-	.long	.LEFDE2-.LASFDE2	/* FDE Length */
-.LASFDE2:
-	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
-#if defined __PIC__ && defined HAVE_AS_X86_PCREL
-	.long	.LFB2-.	/* FDE initial location */
-#else
-	.long	.LFB2
-#endif
-	.long	.LFE2-.LFB2	/* FDE address range */
-#ifdef __PIC__
-	.byte	0x0	/* .uleb128 0x0; Augmentation size */
-#endif
-	/* DW_CFA_xxx CFI instructions go here.  */
-
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI2-.LFB2
-	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
-	.byte	0x8	/* .uleb128 0x8 */
-	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
-	.byte	0x2	/* .uleb128 0x2 */
-
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI3-.LCFI2
-	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
-	.byte	0x5	/* .uleb128 0x5 */
-
-	/* End of DW_CFA_xxx CFI instructions.  */
-	.align 4
-.LEFDE2:
-
-
 .LSFDE3:
 	.long	.LEFDE3-.LASFDE3	/* FDE Length */
 .LASFDE3:
@@ -875,3 +1050,6 @@  _ffi_closure_STDCALL:
 	/* End of DW_CFA_xxx CFI instructions.  */
 	.align 4
 .LEFDE5:
+
+#endif /* !_MSC_VER */
+
Index: libffi/README
===================================================================
--- libffi.orig/README
+++ libffi/README
@@ -109,6 +109,14 @@  will add some extra code which will supp
 are using Purify with libffi. Only use this switch when using 
 Purify, as it will slow down the library.
 
+It's also possible to build libffi on Windows platforms with
+Microsoft's Visual C++ compiler.  In this case, use the msvcc.sh
+wrapper script during configuration like so:
+
+path/to/configure --enable-shared --enable-static \
+	CC=path/to/msvcc.sh LD=link \
+	CPP=\"cl -nologo -EP\"
+
 Configure has many other options. Use "configure --help" to see them all.
 
 Once configure has finished, type "make". Note that you must be using
@@ -128,6 +136,8 @@  See the ChangeLog files for details.
 3.0.10 ???-??-??
         Fix the N64 build on mips-sgi-irix6.5.
 	Testsuite fixes for Tru64 Unix.
+	Enable builds with Microsoft's compiler.
+	Enable x86 builds with Sun's compiler.
 
 3.0.9 Dec-31-09
         Add AVR32 and win64 ports.  Add ARM softfp support.