diff mbox series

[committed] i386: Apply LRA reload workaround to insns with high registers [PR82524]

Message ID CAFULd4bc06GizfAHrpAiZQRpZAojA=ProbpB3jdhnEiVqN_hkg@mail.gmail.com
State New
Headers show
Series [committed] i386: Apply LRA reload workaround to insns with high registers [PR82524] | expand

Commit Message

Uros Bizjak Nov. 8, 2023, 8:59 p.m. UTC
LRA is not able to reload zero_extracted in-out operand with matched input
operand in the same way as strict_low_part in-out operand.  The patch
applies the strict_low_part workaround, where we allow LRA to generate
an instruction with non-matched input operand, which is split post reload
to the instruction that inserts non-matched input operand to an in-out
operand and the instruction that uses matched operand, also to
zero_extracted in-out operand case.

The generated code from the pr82524.c testcase improves from:

    movl    %esi, %ecx
    movl    %edi, %eax
    movsbl  %ch, %esi
    addl    %esi, %edx
    movb    %dl, %ah

to:
    movl    %edi, %eax
    movl    %esi, %ecx
    movb    %ch, %ah
    addb    %dl, %ah

The compiler is now also able to handle non-commutative operations:

    movl    %edi, %eax
    movl    %esi, %ecx
    movb    %ch, %ah
    subb    %dl, %ah

and unary operations:

    movl    %edi, %eax
    movl    %esi, %edx
    movb    %dh, %ah
    negb    %ah

The patch also robustifies split condition of the splitters to ensure that
only alternatives with unmatched operands are split.

    PR target/82524

gcc/ChangeLog:

    * config/i386/i386.md (*add<mode>_1_slp):
    Split insn only for unmatched operand 0.
    (*sub<mode>_1_slp): Ditto.
    (*<any_logic:code><mode>_1_slp): Merge pattern from "*and<mode>_1_slp"
    and "*<any_logic:code><mode>_1_slp" using any_logic code iterator.
    Split insn only for unmatched operand 0.
    (*neg<mode>1_slp): Split insn only for unmatched operand 0.
    (*one_cmpl<mode>_1_slp): Ditto.
    (*ashl<mode>3_1_slp): Ditto.
    (*<any_shiftrt:insn><mode>_1_slp): Ditto.
    (*<any_rotate:insn><mode>_1_slp): Ditto.
    (*addqi_ext<mode>_1): Redefine as define_insn_and_split.  Add
    alternative 1 and split insn after reload for unmatched operand 0.
    (*<plusminus:insn>qi_ext<mode>_2): Merge pattern from
    "*addqi_ext<mode>_2" and "*subqi_ext<mode>_2" using plusminus code
    iterator. Redefine as define_insn_and_split.  Add alternative 1
    and split insn after reload for unmatched operand 0.
    (*subqi_ext<mode>_1): Redefine as define_insn_and_split.  Add
    alternative 1 and split insn after reload for unmatched operand 0.
    (*<any_logic:code>qi_ext<mode>_0): Merge pattern from
    "*andqi_ext<mode>_0" and and "*<any_logic:code>qi_ext<mode>_0" using
    any_logic code iterator.
    (*<any_logic:code>qi_ext<mode>_1): Merge pattern from
    "*andqi_ext<mode>_1" and "*<any_logic:code>qi_ext<mode>_1" using
    any_logic code iterator. Redefine as define_insn_and_split.  Add
    alternative 1 and split insn after reload for unmatched operand 0.
    (*<any_logic:code>qi_ext<mode>_1_cc): Merge pattern from
    "*andqi_ext<mode>_1_cc" and "*xorqi_ext<mode>_1_cc" using any_logic
    code iterator. Redefine as define_insn_and_split.  Add alternative 1
    and split insn after reload for unmatched operand 0.
    (*<any_logic:code>qi_ext<mode>_2): Merge pattern from
    "*andqi_ext<mode>_2" and "*<any_or:code>qi_ext<mode>_2" using
    any_logic code iterator. Redefine as define_insn_and_split.  Add
    alternative 1 and split insn after reload for unmatched operand 0.
    (*<any_logic:code>qi_ext<mode>_3): Redefine as define_insn_and_split.
    Add alternative 1 and split insn after reload for unmatched operand 0.
    (*negqi_ext<mode>_1): Rename from "*negqi_ext<mode>_2".  Add
    alternative 1 and split insn after reload for unmatched operand 0.
    (*one_cmplqi_ext<mode>_1): Ditto.
    (*ashlqi_ext<mode>_1): Ditto.
    (*<any_shiftrt:insn>qi_ext<mode>_1): Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr78904-1.c (test_sub): New test.
    * gcc.target/i386/pr78904-1a.c (test_sub): Ditto.
    * gcc.target/i386/pr78904-1b.c (test_sub): Ditto.
    * gcc.target/i386/pr78904-2.c (test_sub): Ditto.
    * gcc.target/i386/pr78904-2a.c (test_sub): Ditto.
    * gcc.target/i386/pr78904-2b.c (test_sub): Ditto.
    * gcc.target/i386/pr78952-4.c (test_sub): Ditto.
    * gcc.target/i386/pr82524.c: New test.
    * gcc.target/i386/pr82524-1.c: New test.
    * gcc.target/i386/pr82524-2.c: New test.
    * gcc.target/i386/pr82524-3.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 99022990377..ce7102af44f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6596,7 +6596,9 @@  (define_insn_and_split "*add<mode>_1_slp"
       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1])
+	|| rtx_equal_p (operands[0], operands[2]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
@@ -7001,38 +7003,58 @@  (define_expand "addqi_ext_1"
 	       (match_operand:QI 2 "const_int_operand")) 0))
       (clobber (reg:CC FLAGS_REG))])])
 
-(define_insn "*addqi_ext<mode>_1"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*addqi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
 	  (plus:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "general_operand" "QnBn")) 0))
+	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
+  ""
 {
+  if (which_alternative)
+    return "#";
+
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
 	return "inc{b}\t%h0";
       else
-        {
+	{
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{b}\t%h0";
-        }
+	  return "dec{b}\t%h0";
+	}
 
     default:
       return "add{b}\t{%2, %h0|%h0, %2}";
     }
 }
+  "reload_completed
+   && !rtx_equal_p (operands[0], operands[1])"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (plus:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "addr" "gpr8")
    (set (attr "type")
      (if_then_else (match_operand:QI 2 "incdec_operand")
@@ -7040,28 +7062,49 @@  (define_insn "*addqi_ext<mode>_1"
 	(const_string "alu")))
    (set_attr "mode" "QI")])
 
-(define_insn "*addqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<insn>qi_ext<mode>_2"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
-	  (plus:QI
+	  (plusminus:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "%0")
+		[(match_operand 1 "int248_register_operand" "<comm>0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
 	    (subreg:QI
 	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q")
+		[(match_operand 2 "int248_register_operand" "Q,Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)) 0))
-  (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])
-   || rtx_equal_p (operands[0], operands[2])"
-  "add{b}\t{%h2, %h0|%h0, %h2}"
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "@
+   <insn>{b}\t{%h2, %h0|%h0, %h2}
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1])
+	|| (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (plusminus:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (subreg:QI
+		 (match_op_dup 4
+		   [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
@@ -7570,7 +7613,8 @@  (define_insn_and_split "*sub<mode>_1_slp"
   "@
    sub{<imodesuffix>}\t{%2, %0|%0, %2}
    #"
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
@@ -7627,28 +7671,44 @@  (define_insn "*subqi_ext<mode>_0"
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-(define_insn "*subqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*subqi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
 	  (minus:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
-	    (subreg:QI
-	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))
-  (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
-  "sub{b}\t{%h2, %h0|%h0, %h2}"
-  [(set_attr "type" "alu")
+	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "@
+   sub{b}\t{%2, %h0|%h0, %2}
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (minus:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "addr" "gpr8")
+   (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 ;; Subtract with jump on overflow.
@@ -11338,20 +11398,22 @@  (define_insn "*andqi_1"
 	   (symbol_ref "true")))])
 
 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
-(define_insn_and_split "*and<mode>_1_slp"
+(define_insn_and_split "*<code><mode>_1_slp"
   [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
-	(and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
-		   (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
+	(any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
+		      (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "@
-   and{<imodesuffix>}\t{%2, %0|%0, %2}
+   <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
    #"
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1])
+	|| rtx_equal_p (operands[0], operands[2]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
-	   (and:SWI12 (match_dup 0) (match_dup 2)))
+	   (any_logic:SWI12 (match_dup 0) (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -11528,9 +11590,9 @@  (define_insn "*and<mode>_2"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*andqi_ext<mode>_0"
+(define_insn "*<code>qi_ext<mode>_0"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
-	(and:QI
+	(any_logic:QI
 	  (subreg:QI
 	    (match_operator:SWI248 3 "extract_operator"
 	      [(match_operand 2 "int248_register_operand" "Q")
@@ -11539,7 +11601,7 @@  (define_insn "*andqi_ext<mode>_0"
 	  (match_operand:QI 1 "nonimmediate_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "and{b}\t{%h2, %0|%0, %h2}"
+  "<logic>{b}\t{%h2, %0|%0, %h2}"
   [(set_attr "addr" "gpr8")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -11558,86 +11620,180 @@  (define_expand "andqi_ext_1"
 	       (match_operand:QI 2 "const_int_operand")) 0))
       (clobber (reg:CC FLAGS_REG))])])
 
-(define_insn "*andqi_ext<mode>_1"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
-	  (and:QI
+	  (any_logic:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "general_operand" "QnBn")) 0))
+	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
-  "and{b}\t{%2, %h0|%h0, %2}"
+  ""
+  "@
+   <logic>{b}\t{%2, %h0|%h0, %2}
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (any_logic:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "addr" "gpr8")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-;; Generated by peephole translating test to and.  This shows up
-;; often in fp comparisons.
-(define_insn "*andqi_ext<mode>_1_cc"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (and:QI
-	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
-		 (const_int 8)
-		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "general_operand" "QnBn"))
-	  (const_int 0)))
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_1_cc"
+  [(set (match_operand 4 "flags_reg_operand")
+	(match_operator 5 "compare_operator"
+	  [(any_logic:QI
+	     (subreg:QI
+	       (match_operator:SWI248 3 "extract_operator"
+		 [(match_operand 1 "int248_register_operand" "0,!Q")
+		  (const_int 8)
+		  (const_int 8)]) 0)
+	     (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
+	  (const_int 0)]))
    (set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
-	  (and:QI
+	  (any_logic:QI
 	    (subreg:QI
 	      (match_op_dup 3
-		[(match_dup 1)
-		 (const_int 8)
-		 (const_int 8)]) 0)
+		[(match_dup 0) (const_int 8) (const_int 8)]) 0)
 	    (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)
-   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   && rtx_equal_p (operands[0], operands[1])"
-  "and{b}\t{%2, %h0|%h0, %2}"
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   <logic>{b}\t{%2, %h0|%h0, %2}
+   #"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (match_dup 4)
+	   (match_op_dup 5
+	     [(any_logic:QI
+	        (subreg:QI
+		  (match_op_dup 3
+		    [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		(match_dup 2))
+	      (const_int 0)]))
+      (set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (any_logic:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))])]
+  ""
   [(set_attr "addr" "gpr8")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-(define_insn "*andqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_2"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
-	  (and:QI
+	  (any_logic:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "%0")
+		[(match_operand 1 "int248_register_operand" "%0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
 	    (subreg:QI
 	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q")
+		[(match_operand 2 "int248_register_operand" "Q,Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])
-   || rtx_equal_p (operands[0], operands[2])"
-  "and{b}\t{%h2, %h0|%h0, %h2}"
+  ""
+  "@
+   <logic>{b}\t{%h2, %h0|%h0, %h2}
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1])
+	|| rtx_equal_p (operands[0], operands[2]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (any_logic:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (subreg:QI
+		 (match_op_dup 4
+		   [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below.
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<code>qi_ext<mode>_3"
+  [(set (zero_extract:SWI248
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
+	  (const_int 8)
+	  (const_int 8))
+	(match_operator:SWI248 3 "extract_operator"
+	  [(any_logic
+	     (match_operand 1 "int248_register_operand" "%0,!Q")
+	     (match_operand 2 "int248_register_operand" "Q,Q"))
+	   (const_int 8)
+	   (const_int 8)]))
+   (clobber (reg:CC FLAGS_REG))]
+  "GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "@
+   <logic>{b}\t{%h2, %h0|%h0, %h2}
+   #"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1])
+	|| rtx_equal_p (operands[0], operands[2]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (match_op_dup 3
+	     [(any_logic (match_dup 4) (match_dup 2))
+	      (const_int 8) (const_int 8)]))
+      (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
 
 ;; Convert wide AND instructions with immediate operand to shorter QImode
 ;; equivalents when possible.
@@ -12166,26 +12322,6 @@  (define_insn_and_split "*notxorqi_1"
 	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
 	   (symbol_ref "true")))])
 
-;; Alternative 1 is needed to work around LRA limitation, see PR82524.
-(define_insn_and_split "*<code><mode>_1_slp"
-  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
-	(any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
-		      (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "@
-   <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
-   #"
-  "&& reload_completed"
-  [(set (strict_low_part (match_dup 0)) (match_dup 1))
-   (parallel
-     [(set (strict_low_part (match_dup 0))
-	   (any_or:SWI12 (match_dup 0) (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "type" "alu")
-   (set_attr "mode" "<MODE>")])
-
 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
 ;; This eliminates sign extension after logic operation.
@@ -12276,90 +12412,6 @@  (define_insn "*<code><mode>_3"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<code>qi_ext<mode>_0"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
-	(any_or:QI
-	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
-	  (match_operand:QI 1 "nonimmediate_operand" "0")))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "<logic>{b}\t{%h2, %0|%0, %h2}"
-  [(set_attr "addr" "gpr8")
-   (set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_1"
-  [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
-	  (const_int 8)
-	  (const_int 8))
-	(subreg:SWI248
-	  (any_or:QI
-	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
-		 (const_int 8)
-		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "general_operand" "QnBn")) 0))
-   (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   && rtx_equal_p (operands[0], operands[1])"
-  "<logic>{b}\t{%2, %h0|%h0, %2}"
-  [(set_attr "addr" "gpr8")
-   (set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_2"
-  [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
-	  (const_int 8)
-	  (const_int 8))
-	(subreg:SWI248
-	  (any_or:QI
-	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "%0")
-		 (const_int 8)
-		 (const_int 8)]) 0)
-	    (subreg:QI
-	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))
-   (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   && (rtx_equal_p (operands[0], operands[1])
-       || rtx_equal_p (operands[0], operands[2]))"
-  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
-(define_insn "*<code>qi_ext<mode>_3"
-  [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
-	  (const_int 8)
-	  (const_int 8))
-	(zero_extract:SWI248
-	  (any_logic:SWI248
-	    (match_operand 1 "int248_register_operand" "%0")
-	    (match_operand 2 "int248_register_operand" "Q"))
-	  (const_int 8)
-	  (const_int 8)))
-   (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   && (rtx_equal_p (operands[0], operands[1])
-       || rtx_equal_p (operands[0], operands[2]))"
-  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
 ;; Convert wide OR instructions with immediate operand to shorter QImode
 ;; equivalents when possible.
 ;; Don't do the splitting with memory operands, since it introduces risk
@@ -12443,37 +12495,6 @@  (define_expand "xorqi_ext_1_cc"
 				  (const_int 8)) 0)
 	     (match_dup 2)) 0))])])
 
-(define_insn "*xorqi_ext<mode>_1_cc"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (xor:QI
-	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
-		 (const_int 8)
-		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "general_operand" "QnBn"))
-	  (const_int 0)))
-   (set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
-	  (const_int 8)
-	  (const_int 8))
-	(subreg:SWI248
-	  (xor:QI
-	    (subreg:QI
-	      (match_op_dup 3
-		[(match_dup 1)
-		 (const_int 8)
-		 (const_int 8)]) 0)
-	  (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)
-   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   && rtx_equal_p (operands[0], operands[1])"
-  "xor{b}\t{%2, %h0|%h0, %2}"
-  [(set_attr "addr" "gpr8")
-   (set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
 ;; Peephole2 rega = 0; rega op= regb into rega = regb.
 (define_peephole2
   [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
@@ -12813,7 +12834,8 @@  (define_insn_and_split "*neg<mode>_1_slp"
   "@
    neg{<imodesuffix>}\t%0
    #"
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
@@ -12881,22 +12903,40 @@  (define_expand "x86_neg<mode>_ccc"
      (set (match_operand:SWI48 0 "register_operand")
 	  (neg:SWI48 (match_dup 1)))])])
 
-(define_insn "*negqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*negqi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
 	  (neg:QI
 	    (subreg:QI
 	      (match_operator:SWI248 2 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
-  "neg{b}\t%h0"
+  ""
+  "@
+   neg{b}\t%h0
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (neg:QI
+	       (subreg:QI
+		 (match_op_dup 2
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "type" "negnot")
    (set_attr "mode" "QI")])
 
@@ -13420,7 +13460,8 @@  (define_insn_and_split "*one_cmpl<mode>_1_slp"
   "@
    not{<imodesuffix>}\t%0
    #"
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (set (strict_low_part (match_dup 0))
 	(not:SWI12 (match_dup 0)))]
@@ -13479,6 +13520,40 @@  (define_split
 				    (const_int 0)]))
 	      (set (match_dup 1)
 		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
+
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*one_cmplqi_ext<mode>_1"
+  [(set (zero_extract:SWI248
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
+	  (const_int 8)
+	  (const_int 8))
+	(subreg:SWI248
+	  (not:QI
+	    (subreg:QI
+	      (match_operator:SWI248 2 "extract_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")
+		 (const_int 8)
+		 (const_int 8)]) 0)) 0))]
+  ""
+  "@
+   not{b}\t%h0
+   #"
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(zero_extract:SWI248
+	  (match_dup 1) (const_int 8) (const_int 8)))
+   (set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(subreg:SWI248
+	  (not:QI
+	    (subreg:QI
+	      (match_op_dup 2
+		[(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
+  ""
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI")])
 
 ;; Shift instructions
 
@@ -14254,7 +14329,8 @@  (define_insn_and_split "*ashl<mode>3_1_slp"
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
@@ -14458,23 +14534,26 @@  (define_insn "*ashl<mode>3_cconly"
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*ashlqi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*ashlqi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
 	  (ashift:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
-  (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
+	    (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
 {
+  if (which_alternative)
+    return "#";
+
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
@@ -14489,6 +14568,22 @@  (define_insn "*ashlqi_ext<mode>_2"
 	return "sal{b}\t{%2, %h0|%h0, %2}";
     }
 }
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(match_dup 1))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (ashift:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set (attr "type")
      (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
 		 (match_operand 2 "const1_operand"))
@@ -15247,7 +15342,8 @@  (define_insn_and_split "*<insn><mode>3_1_slp"
   else
     return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
@@ -15361,29 +15457,48 @@  (define_insn "*<insn><mode>3_cconly"
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<insn>qi_ext<mode>_2"
+;; Alternative 1 is needed to work around LRA limitation, see PR82524.
+(define_insn_and_split "*<insn>qi_ext<mode>_1"
   [(set (zero_extract:SWI248
-	  (match_operand 0 "int248_register_operand" "+Q")
+	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
 	(subreg:SWI248
 	  (any_shiftrt:QI
 	    (subreg:QI
 	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0")
+		[(match_operand 1 "int248_register_operand" "0,!Q")
 		 (const_int 8)
 		 (const_int 8)]) 0)
-	    (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
-  (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
+	    (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
 {
+  if (which_alternative)
+    return "#";
+
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
     return "<shift>{b}\t%h0";
   else
     return "<shift>{b}\t{%2, %h0|%h0, %2}";
 }
+  "reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
+  [(set (zero_extract:SWI248
+	  (match_dup 0) (const_int 8) (const_int 8))
+	(match_dup 1))
+   (parallel
+     [(set (zero_extract:SWI248
+	     (match_dup 0) (const_int 8) (const_int 8))
+	   (subreg:SWI248
+	     (any_shiftrt:QI
+	       (subreg:QI
+		 (match_op_dup 3
+		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	       (match_dup 2)) 0))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
@@ -15875,7 +15990,8 @@  (define_insn_and_split "*<insn><mode>3_1_slp"
   else
     return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  "&& reload_completed"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (strict_low_part (match_dup 0)) (match_dup 1))
    (parallel
      [(set (strict_low_part (match_dup 0))
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1.c b/gcc/testsuite/gcc.target/i386/pr78904-1.c
index d27d7fd651d..ed5403f8067 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1.c
@@ -46,3 +46,12 @@  struct S1 test_add (struct S1 a, struct S1 b)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+  a.val -= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1a.c b/gcc/testsuite/gcc.target/i386/pr78904-1a.c
index 7746477d745..aa9273eeb64 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1a.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1a.c
@@ -45,3 +45,12 @@  struct S1 test_add (struct S1 a, struct S1 b)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+  a.val -= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1b.c b/gcc/testsuite/gcc.target/i386/pr78904-1b.c
index 20b677252ab..0687c95e912 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-1b.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-1b.c
@@ -47,3 +47,12 @@  struct S1 test_add (struct S1 a, struct S1 b)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+  a.val -= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2.c b/gcc/testsuite/gcc.target/i386/pr78904-2.c
index 0cc4aaa91ea..3e9389ec20d 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2.c
@@ -47,3 +47,12 @@  struct S1 test_add (struct S1 a)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+  a.val -= t.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2a.c b/gcc/testsuite/gcc.target/i386/pr78904-2a.c
index 41eaa259158..f0c5979c821 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2a.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2a.c
@@ -46,3 +46,12 @@  struct S1 test_add (struct S1 a)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+  a.val -= t.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2b.c b/gcc/testsuite/gcc.target/i386/pr78904-2b.c
index 23e975ac93e..e6154e6d918 100644
--- a/gcc/testsuite/gcc.target/i386/pr78904-2b.c
+++ b/gcc/testsuite/gcc.target/i386/pr78904-2b.c
@@ -48,3 +48,12 @@  struct S1 test_add (struct S1 a)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */
+
+struct S1 test_sub (struct S1 a)
+{
+  a.val -= t.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c
index c7bd63c9543..d9979672945 100644
--- a/gcc/testsuite/gcc.target/i386/pr78952-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c
@@ -46,3 +46,12 @@  struct S1 test_add (struct S1 a, struct S1 b)
 }
 
 /* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S1 test_sub (struct S1 a, struct S1 b)
+{
+  a.val -= b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-1.c b/gcc/testsuite/gcc.target/i386/pr82524-1.c
new file mode 100644
index 00000000000..6539630900a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-1.c
@@ -0,0 +1,63 @@ 
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+  char pad1;
+  char val;
+  char pad2;
+  char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, struct S c)
+{ 
+  a.val = b.val + c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, struct S c)
+{ 
+  a.val = b.val - c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_and (struct S a, struct S b, struct S c)
+{ 
+  a.val = b.val & c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, struct S c)
+{ 
+  a.val = b.val | c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, struct S c)
+{ 
+  a.val = b.val ^ c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-2.c b/gcc/testsuite/gcc.target/i386/pr82524-2.c
new file mode 100644
index 00000000000..766dd1aae1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-2.c
@@ -0,0 +1,63 @@ 
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+  char pad1;
+  char val;
+  char pad2;
+  char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, char *c)
+{ 
+  a.val = b.val + *c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, char *c)
+{ 
+  a.val = b.val - *c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_and (struct S a, struct S b, char *c)
+{ 
+  a.val = b.val & *c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, char *c)
+{ 
+  a.val = b.val | *c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, char *c)
+{ 
+  a.val = b.val ^ *c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524-3.c b/gcc/testsuite/gcc.target/i386/pr82524-3.c
new file mode 100644
index 00000000000..7a66712193e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524-3.c
@@ -0,0 +1,42 @@ 
+/* PR target/82524 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-not "movzbl" } } */
+
+struct S
+{
+  unsigned char pad1;
+  unsigned char val;
+  unsigned short pad2;
+  unsigned int pad3;
+};
+
+struct S
+test_and (struct S a, struct S b, struct S c)
+{
+  a.val = b.val & c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, struct S c)
+{
+  a.val = b.val | c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, struct S c)
+{
+  a.val = b.val ^ c.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82524.c b/gcc/testsuite/gcc.target/i386/pr82524.c
new file mode 100644
index 00000000000..058f0a2d14d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82524.c
@@ -0,0 +1,83 @@ 
+/* PR target/82524 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-mregparm=3" { target ia32 } } */
+/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */
+
+struct S
+{
+  char pad1;
+  char val;
+  char pad2;
+  char pad3;
+};
+
+struct S
+test_plus (struct S a, struct S b, char c)
+{ 
+  a.val = b.val + c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]addb" } } */
+
+struct S
+test_minus (struct S a, struct S b, char c)
+{ 
+  a.val = b.val - c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]subb" } } */
+
+struct S
+test_neg (struct S a, struct S b)
+{ 
+  a.val = -b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]negb" } } */
+
+struct S
+test_and (struct S a, struct S b, char c)
+{ 
+  a.val = b.val & c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]andb" } } */
+
+struct S
+test_or (struct S a, struct S b, char c)
+{ 
+  a.val = b.val | c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]orb" } } */
+
+struct S
+test_xor (struct S a, struct S b, char c)
+{ 
+  a.val = b.val ^ c;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]xorb" } } */
+
+struct S
+test_not (struct S a, struct S b)
+{ 
+  a.val = ~b.val;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]notb" } } */