diff mbox series

i386: Implement Thread Local Storage on Windows

Message ID PR3P194MB17140D5994C5B0F19B2B8DADAB642@PR3P194MB1714.EURP194.PROD.OUTLOOK.COM
State New
Headers show
Series i386: Implement Thread Local Storage on Windows | expand

Commit Message

Julian Waters Sept. 12, 2024, 5:08 a.m. UTC
Hello everyone,

This patch is an initial implementation of native Thread Local Storage on Windows, which currently emulates TLS via emutls. This was heavily referenced from Daniel Green's original work with Windows TLS from a decade ago, so credit should be attributed to him as well (https://github.com/venix1 with the original implementation being https://github.com/venix1/MinGW-GDC/blob/master/patches/mingw-tls-gcc-4.8.patch). TLS support still requires a bug in ld to be fixed, and the work for that is currently underway (With thanks to Jan Beulich). Note that native TLS is still disabled by default for Windows, and has to be explicitly be enabled via the --enable-tls switch during configure time. There are some issues with this implementation, namely that the TLS section is only emitted with the w section flag, and does not have the d flag emitted alongside it (I am unsure whether as requires the d flag or not), the TLS init method being emitted has not yet been rewritten to work on Windows (I do not know how to do this), and that the last step of the TLS access contains an inefficiency due to the patch zero extending the TLS symbol, which causes an extra instruction to be emitted. This is unfortunate, but I could not find a way to implement this without the zero extending, as all other alternatives would crash when trying to compile libgcc or libgomp. If anyone has suggestions to fix this inefficient extra instruction, as well as the other issues with the implementation, I would be more than happy to apply the changes to the patch. As always, I do not have any write access to gcc, and once the green light is given for this patch I need help in committing it to gcc. The patch is attached at the very end of this mail

best regards,
Julian

P.S. The demonstration of the extra unrequired instruction is shown here, by comparing it to clang (Both at -O3):

thread_local int local = 2;

int main() {
    local = 7;
}

clang:
mov	eax, dword ptr [rip + _tls_index]
mov	rcx, qword ptr gs:[88]
mov	rsi, qword ptr [rcx + 8*rax]
mov	dword ptr [rsi + local@SECREL32], 7 <------ Notice how clang moves 7 into the calculated TLS address in one step

gcc:

mov	eax, DWORD PTR [rip+_tls_index]
mov	rdx, QWORD PTR gs:[88]
mov	rax, QWORD PTR [rdx+rax*8]
lea	edx, local@secrel32 <------ gcc first loads the TLS offset
mov	DWORD PTR [rdx+rax], 7 <------ Then adds it to the thread pointer, before moving, which is not necessary

gcc/config/i386/ChangeLog:

	* i386.cc
	    (mingw_w64_pe_select_section): New method.
	    (ix86_legitimate_constant_p): Handle new relocation.
	    (legitimate_pic_operand_p): Handle new relocation.
	    (legitimate_pic_address_disp_p): Handle new relocation.
	    (ix86_legitimate_address_p): Handle new relocation.
	    (legitimize_tls_address): Handle new Thread Local Storage model.
	    (output_pic_addr_const): Handle new relocation.
	    (i386_output_dwarf_dtprel): Handle new relocation.
	    (i386_asm_output_addr_const_extra): Handle new relocation.

	* i386.h: New TARGET_WIN32_TLS flag.

	* i386.md: Define UNSPEC_SECREL32, UNSPEC_TLS_WIN32 and handle new RTL template.

	* mingw-w64.h: Define TARGET_ASM_SELECT_SECTION and TARGET_WIN32_TLS.

	* predicates.md: Handle new relocation.

gcc/config/mingw/ChangeLog:

	* winnt.cc (mingw_pe_unique_section): Emit new TLS section.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 45320124b91..c1e6760a073 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -789,6 +789,20 @@  x86_64_elf_select_section (tree decl, int reloc,
   return default_elf_select_section (decl, reloc, align);
 }
 
+ATTRIBUTE_UNUSED static section *
+mingw_w64_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+    {
+      if (!DECL_P (decl))
+	decl = NULL_TREE;
+
+      return get_named_section (decl, ".tls$", reloc);
+    }
+  else
+    return default_select_section (decl, reloc, align);
+}
+
 /* Select a set of attributes for section NAME based on the properties
    of DECL and whether or not RELOC indicates that DECL's initializer
    might contain runtime relocations.  */
@@ -11170,6 +11184,9 @@  ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	    x = XVECEXP (x, 0, 0);
 	    return (GET_CODE (x) == SYMBOL_REF
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (x, 0, 0);
+	    return GET_CODE (x) == SYMBOL_REF;
 	  default:
 	    return false;
 	  }
@@ -11306,6 +11323,9 @@  legitimate_pic_operand_p (rtx x)
 	    x = XVECEXP (inner, 0, 0);
 	    return (GET_CODE (x) == SYMBOL_REF
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (inner, 0, 0);
+	    return GET_CODE (x) == SYMBOL_REF;
 	  case UNSPEC_MACHOPIC_OFFSET:
 	    return legitimate_pic_address_disp_p (x);
 	  default:
@@ -11486,6 +11506,9 @@  legitimate_pic_address_disp_p (rtx disp)
       disp = XVECEXP (disp, 0, 0);
       return (GET_CODE (disp) == SYMBOL_REF
 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    case UNSPEC_SECREL32:
+      disp = XVECEXP (disp, 0, 0);
+      return GET_CODE (disp) == SYMBOL_REF;
     }
 
   return false;
@@ -11763,6 +11786,7 @@  ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 	  case UNSPEC_INDNTPOFF:
 	  case UNSPEC_NTPOFF:
 	  case UNSPEC_DTPOFF:
+	  case UNSPEC_SECREL32:
 	    break;
 
 	  default:
@@ -12165,6 +12189,14 @@  ix86_tls_module_base (void)
 rtx
 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 {
+#if TARGET_WIN32_TLS
+  rtx base = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, gen_rtx_SET (base, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLS_WIN32)), gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
+
+  // Only 64-bit is supported
+  return gen_rtx_PLUS (Pmode, base, gen_rtx_ZERO_EXTEND (Pmode, gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))));
+#else
   rtx dest, base, off;
   rtx pic = NULL_RTX, tp = NULL_RTX;
   machine_mode tp_mode = Pmode;
@@ -12403,6 +12435,7 @@  legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
     }
 
   return dest;
+#endif
 }
 
 /* Return true if the TLS address requires insn using integer registers.
@@ -12865,6 +12898,9 @@  output_pic_addr_const (FILE *file, rtx x, int code)
 	case UNSPEC_INDNTPOFF:
 	  fputs ("@indntpoff", file);
 	  break;
+	case UNSPEC_SECREL32:
+	  fputs ("@secrel32", file);
+	  break;
 #if TARGET_MACHO
 	case UNSPEC_MACHOPIC_OFFSET:
 	  putc ('-', file);
@@ -12890,7 +12926,11 @@  i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
 {
   fputs (ASM_LONG, file);
   output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+  fputs ("@secrel32", file);
+#else
   fputs ("@dtpoff", file);
+#endif
   switch (size)
     {
     case 4:
@@ -14643,6 +14683,10 @@  i386_asm_output_addr_const_extra (FILE *file, rtx x)
       output_addr_const (file, op);
       fputs ("@indntpoff", file);
       break;
+    case UNSPEC_SECREL32:
+      output_addr_const (file, op);
+      fputs ("@secrel32", file);
+      break;
 #if TARGET_MACHO
     case UNSPEC_MACHOPIC_OFFSET:
       output_addr_const (file, op);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index c1ec92ffb15..ae12304fe06 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -506,6 +506,7 @@  extern unsigned char ix86_prefetch_sse;
 #define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
 #define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS		0
+#define TARGET_WIN32_TLS	0
 
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8d269feee83..00de288ce98 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -79,6 +79,7 @@ 
   UNSPEC_MACHOPIC_OFFSET
   UNSPEC_PCREL
   UNSPEC_SIZEOF
+  UNSPEC_SECREL32
 
   ;; Prologue support
   UNSPEC_STACK_ALLOC
@@ -94,6 +95,7 @@ 
   UNSPEC_TLS_LD_BASE
   UNSPEC_TLSDESC
   UNSPEC_TLS_IE_SUN
+  UNSPEC_TLS_WIN32
 
   ;; Other random patterns
   UNSPEC_SCAS
@@ -22709,6 +22711,14 @@ 
       (clobber (match_dup 5))
       (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn ""
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(const_int 0)] UNSPEC_TLS_WIN32))
+	(clobber (match_scratch:P 1 "=r"))]
+  "TARGET_WIN32_TLS"
+  "mov{l}\t{_tls_index(%%rip), %k0|%k0, DWORD PTR [rip+_tls_index]}\;mov{q}\t{%%gs:88, %1|%1, QWORD PTR gs:[88]}\;mov{q}\t{(%1,%0,8), %0|%0, QWORD PTR [%1+%0*8]}"
+  [(set_attr "type" "multi")])
+
 ;; Load and add the thread base pointer from %<tp_seg>:0.
 (define_expand "get_thread_pointer<mode>"
   [(set (match_operand:PTR 0 "register_operand")
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 0a9986c44d4..47c76eb85a2 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -135,3 +135,9 @@  along with GCC; see the file COPYING3.  If not see
    original mingw32.  */
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION mingw_w64_pe_select_section
+
+#undef TARGET_WIN32_TLS
+#define TARGET_WIN32_TLS 1
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 053312bbe27..d83b27355cc 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -218,6 +218,7 @@ 
 	  case UNSPEC_DTPOFF:
 	  case UNSPEC_GOTNTPOFF:
 	  case UNSPEC_NTPOFF:
+	  case UNSPEC_SECREL32:
 	    return true;
 	  default:
 	    break;
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 803e5f5ec85..5721298b224 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -415,6 +415,8 @@  mingw_pe_unique_section (tree decl, int reloc)
     prefix = ".text$";
   else if (decl_readonly_section (decl, reloc))
     prefix = ".rdata$";
+  else if (DECL_THREAD_LOCAL_P (decl))
+    prefix = ".tls$";
   else
     prefix = ".data$";
   len = strlen (name) + strlen (prefix);