===================================================================
@@ -9238,37 +9269,6 @@
/* This is also a multiply, so it distributes over everything. */
break;
- case SUBREG:
- /* Non-paradoxical SUBREGs distributes over all operations,
- provided the inner modes and byte offsets are the same, this
- is an extraction of a low-order part, we don't convert an fp
- operation to int or vice versa, this is not a vector mode,
- and we would not be converting a single-word operation into a
- multi-word operation. The latter test is not required, but
- it prevents generating unneeded multi-word operations. Some
- of the previous tests are redundant given the latter test,
- but are retained because they are required for correctness.
-
- We produce the result slightly differently in this case. */
-
- if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
- || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
- || ! subreg_lowpart_p (lhs)
- || (GET_MODE_CLASS (GET_MODE (lhs))
- != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
- || paradoxical_subreg_p (lhs)
- || VECTOR_MODE_P (GET_MODE (lhs))
- || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
- /* Result might need to be truncated. Don't change mode if
- explicit truncation is needed. */
- || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
- GET_MODE (SUBREG_REG (lhs))))
- return x;
-
- tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
- SUBREG_REG (lhs), SUBREG_REG (rhs));
- return gen_lowpart (GET_MODE (x), tem);
-
default:
return x;
}
Patch B: Re-implement SUBREG case specifically in simplify_set
===================================================================
@@ -6299,6 +6299,7 @@
rtx dest = SET_DEST (x);
enum machine_mode mode
= GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+ rtx src_subreg;
rtx other_insn;
rtx *cc_use;
@@ -6496,6 +6497,10 @@
and X being a REG or (subreg (reg)), we may be able to convert this to
(set (subreg:m2 x) (op)).
+ Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+ we may be able to first distribute the subreg over op, and then apply
+ the above transformation.
+
We can always do this if M1 is narrower than M2 because that means that
we only care about the low bits of the result.
@@ -6504,30 +6509,56 @@
be undefined. On machine where it is defined, this transformation is safe
as long as M1 and M2 have the same number of words. */
+ src_subreg = NULL_RTX;
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
- && !OBJECT_P (SUBREG_REG (src))
+ && !OBJECT_P (SUBREG_REG (src)))
+ src_subreg = SUBREG_REG (src);
+ else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+ || GET_CODE (src) == AND
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx lhs = XEXP (x, 0);
+ rtx rhs = XEXP (x, 1);
+
+ /* We can distribute non-paradoxical lowpart SUBREGs if the
+ inner modes agree. */
+ if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+ && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+ && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+ && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+ /* This is safe in general only for integral modes. */
+ && INTEGRAL_MODE_P (GET_MODE (lhs))
+ && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+ /* Result might need to be truncated. Don't change mode if
+ explicit truncation is needed. */
+ && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+ GET_MODE (SUBREG_REG (lhs))))
+ src_subreg = simplify_gen_binary (GET_CODE (src),
+ GET_MODE (SUBREG_REG (lhs)),
+ SUBREG_REG (lhs), SUBREG_REG (rhs));
+ }
+
+ if (src_subreg
&& (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
/ UNITS_PER_WORD)
- == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
- + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+ == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+ + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
#ifndef WORD_REGISTER_OPERATIONS
&& (GET_MODE_SIZE (GET_MODE (src))
- < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+ < GET_MODE_SIZE (GET_MODE (src_subreg)))
#endif
#ifdef CANNOT_CHANGE_MODE_CLASS
&& ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
- GET_MODE (SUBREG_REG (src)),
+ GET_MODE (src_subreg),
GET_MODE (src)))
#endif
&& (REG_P (dest)
|| (GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest)))))
{
- SUBST (SET_DEST (x),
- gen_lowpart (GET_MODE (SUBREG_REG (src)),
- dest));
- SUBST (SET_SRC (x), SUBREG_REG (src));
+ SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+ SUBST (SET_SRC (x), src_subreg);
src = SET_SRC (x), dest = SET_DEST (x);
}
--
Dr. Ulrich Weigand
GNU Toolchain for Linux on System z and Cell BE
Ulrich.Weigand@de.ibm.com
Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
>
> That could indeed work.
I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.
As test case I used the whole of libstdc++.so on the following set of
platforms:
- i686-pc-linux
- s390x-ibm-linux
- powerpc-ibm-linux
- arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
- current mainline
- current mainline plus the first patch below
- current mainline plus both patches below
All three resulting object files were identical for every platform.
Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?
Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).
Thoughts?
Thanks,
Ulrich
Patch A: Remove SUBREG case in apply_distributive_law
===================================================================
@@ -9238,37 +9269,6 @@
/* This is also a multiply, so it distributes over everything. */
break;
- case SUBREG:
- /* Non-paradoxical SUBREGs distributes over all operations,
- provided the inner modes and byte offsets are the same, this
- is an extraction of a low-order part, we don't convert an fp
- operation to int or vice versa, this is not a vector mode,
- and we would not be converting a single-word operation into a
- multi-word operation. The latter test is not required, but
- it prevents generating unneeded multi-word operations. Some
- of the previous tests are redundant given the latter test,
- but are retained because they are required for correctness.
-
- We produce the result slightly differently in this case. */
-
- if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
- || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
- || ! subreg_lowpart_p (lhs)
- || (GET_MODE_CLASS (GET_MODE (lhs))
- != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
- || paradoxical_subreg_p (lhs)
- || VECTOR_MODE_P (GET_MODE (lhs))
- || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
- /* Result might need to be truncated. Don't change mode if
- explicit truncation is needed. */
- || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
- GET_MODE (SUBREG_REG (lhs))))
- return x;
-
- tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
- SUBREG_REG (lhs), SUBREG_REG (rhs));
- return gen_lowpart (GET_MODE (x), tem);
-
default:
return x;
}
Patch B: Re-implement SUBREG case specifically in simplify_set
===================================================================
@@ -6299,6 +6299,7 @@
rtx dest = SET_DEST (x);
enum machine_mode mode
= GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+ rtx src_subreg;
rtx other_insn;
rtx *cc_use;
@@ -6496,6 +6497,10 @@
and X being a REG or (subreg (reg)), we may be able to convert this to
(set (subreg:m2 x) (op)).
+ Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+ we may be able to first distribute the subreg over op, and then apply
+ the above transformation.
+
We can always do this if M1 is narrower than M2 because that means that
we only care about the low bits of the result.
@@ -6504,30 +6509,56 @@
be undefined. On machine where it is defined, this transformation is safe
as long as M1 and M2 have the same number of words. */
+ src_subreg = NULL_RTX;
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
- && !OBJECT_P (SUBREG_REG (src))
+ && !OBJECT_P (SUBREG_REG (src)))
+ src_subreg = SUBREG_REG (src);
+ else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+ || GET_CODE (src) == AND
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx lhs = XEXP (x, 0);
+ rtx rhs = XEXP (x, 1);
+
+ /* We can distribute non-paradoxical lowpart SUBREGs if the
+ inner modes agree. */
+ if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+ && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+ && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+ && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+ /* This is safe in general only for integral modes. */
+ && INTEGRAL_MODE_P (GET_MODE (lhs))
+ && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+ /* Result might need to be truncated. Don't change mode if
+ explicit truncation is needed. */
+ && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+ GET_MODE (SUBREG_REG (lhs))))
+ src_subreg = simplify_gen_binary (GET_CODE (src),
+ GET_MODE (SUBREG_REG (lhs)),
+ SUBREG_REG (lhs), SUBREG_REG (rhs));
+ }
+
+ if (src_subreg
&& (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
/ UNITS_PER_WORD)
- == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
- + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+ == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+ + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
#ifndef WORD_REGISTER_OPERATIONS
&& (GET_MODE_SIZE (GET_MODE (src))
- < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+ < GET_MODE_SIZE (GET_MODE (src_subreg)))
#endif
#ifdef CANNOT_CHANGE_MODE_CLASS
&& ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
- GET_MODE (SUBREG_REG (src)),
+ GET_MODE (src_subreg),
GET_MODE (src)))
#endif
&& (REG_P (dest)
|| (GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest)))))
{
- SUBST (SET_DEST (x),
- gen_lowpart (GET_MODE (SUBREG_REG (src)),
- dest));
- SUBST (SET_SRC (x), SUBREG_REG (src));
+ SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+ SUBST (SET_SRC (x), src_subreg);
src = SET_SRC (x), dest = SET_DEST (x);
}
--
Dr. Ulrich Weigand
GNU Toolchain for Linux on System z and Cell BE
Ulrich.Weigand@de.ibm.com
Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
>
> That could indeed work.
I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.
As test case I used the whole of libstdc++.so on the following set of
platforms:
- i686-pc-linux
- s390x-ibm-linux
- powerpc-ibm-linux
- arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
- current mainline
- current mainline plus the first patch below
- current mainline plus both patches below
All three resulting object files were identical for every platform.
Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?
Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).
Thoughts?
Thanks,
Ulrich
Patch A: Remove SUBREG case in apply_distributive_law
===================================================================
@@ -9238,37 +9269,6 @@
/* This is also a multiply, so it distributes over everything. */
break;
- case SUBREG:
- /* Non-paradoxical SUBREGs distributes over all operations,
- provided the inner modes and byte offsets are the same, this
- is an extraction of a low-order part, we don't convert an fp
- operation to int or vice versa, this is not a vector mode,
- and we would not be converting a single-word operation into a
- multi-word operation. The latter test is not required, but
- it prevents generating unneeded multi-word operations. Some
- of the previous tests are redundant given the latter test,
- but are retained because they are required for correctness.
-
- We produce the result slightly differently in this case. */
-
- if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
- || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
- || ! subreg_lowpart_p (lhs)
- || (GET_MODE_CLASS (GET_MODE (lhs))
- != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
- || paradoxical_subreg_p (lhs)
- || VECTOR_MODE_P (GET_MODE (lhs))
- || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
- /* Result might need to be truncated. Don't change mode if
- explicit truncation is needed. */
- || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
- GET_MODE (SUBREG_REG (lhs))))
- return x;
-
- tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
- SUBREG_REG (lhs), SUBREG_REG (rhs));
- return gen_lowpart (GET_MODE (x), tem);
-
default:
return x;
}
Patch B: Re-implement SUBREG case specifically in simplify_set
===================================================================
@@ -6299,6 +6299,7 @@
rtx dest = SET_DEST (x);
enum machine_mode mode
= GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+ rtx src_subreg;
rtx other_insn;
rtx *cc_use;
@@ -6496,6 +6497,10 @@
and X being a REG or (subreg (reg)), we may be able to convert this to
(set (subreg:m2 x) (op)).
+ Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+ we may be able to first distribute the subreg over op, and then apply
+ the above transformation.
+
We can always do this if M1 is narrower than M2 because that means that
we only care about the low bits of the result.
@@ -6504,30 +6509,56 @@
be undefined. On machine where it is defined, this transformation is safe
as long as M1 and M2 have the same number of words. */
+ src_subreg = NULL_RTX;
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
- && !OBJECT_P (SUBREG_REG (src))
+ && !OBJECT_P (SUBREG_REG (src)))
+ src_subreg = SUBREG_REG (src);
+ else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+ || GET_CODE (src) == AND
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx lhs = XEXP (x, 0);
+ rtx rhs = XEXP (x, 1);
+
+ /* We can distribute non-paradoxical lowpart SUBREGs if the
+ inner modes agree. */
+ if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+ && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+ && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+ && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+ /* This is safe in general only for integral modes. */
+ && INTEGRAL_MODE_P (GET_MODE (lhs))
+ && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+ /* Result might need to be truncated. Don't change mode if
+ explicit truncation is needed. */
+ && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+ GET_MODE (SUBREG_REG (lhs))))
+ src_subreg = simplify_gen_binary (GET_CODE (src),
+ GET_MODE (SUBREG_REG (lhs)),
+ SUBREG_REG (lhs), SUBREG_REG (rhs));
+ }
+
+ if (src_subreg
&& (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
/ UNITS_PER_WORD)
- == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
- + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+ == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+ + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
#ifndef WORD_REGISTER_OPERATIONS
&& (GET_MODE_SIZE (GET_MODE (src))
- < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+ < GET_MODE_SIZE (GET_MODE (src_subreg)))
#endif
#ifdef CANNOT_CHANGE_MODE_CLASS
&& ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
- GET_MODE (SUBREG_REG (src)),
+ GET_MODE (src_subreg),
GET_MODE (src)))
#endif
&& (REG_P (dest)
|| (GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest)))))
{
- SUBST (SET_DEST (x),
- gen_lowpart (GET_MODE (SUBREG_REG (src)),
- dest));
- SUBST (SET_SRC (x), SUBREG_REG (src));
+ SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+ SUBST (SET_SRC (x), src_subreg);
src = SET_SRC (x), dest = SET_DEST (x);
}
--
Dr. Ulrich Weigand
GNU Toolchain for Linux on System z and Cell BE
Ulrich.Weigand@de.ibm.com
Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
>
> That could indeed work.
I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.
As test case I used the whole of libstdc++.so on the following set of
platforms:
- i686-pc-linux
- s390x-ibm-linux
- powerpc-ibm-linux
- arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
- current mainline
- current mainline plus the first patch below
- current mainline plus both patches below
All three resulting object files were identical for every platform.
Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?
Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).
Thoughts?
Thanks,
Ulrich
Patch A: Remove SUBREG case in apply_distributive_law
===================================================================
@@ -9238,37 +9269,6 @@
/* This is also a multiply, so it distributes over everything. */
break;
- case SUBREG:
- /* Non-paradoxical SUBREGs distributes over all operations,
- provided the inner modes and byte offsets are the same, this
- is an extraction of a low-order part, we don't convert an fp
- operation to int or vice versa, this is not a vector mode,
- and we would not be converting a single-word operation into a
- multi-word operation. The latter test is not required, but
- it prevents generating unneeded multi-word operations. Some
- of the previous tests are redundant given the latter test,
- but are retained because they are required for correctness.
-
- We produce the result slightly differently in this case. */
-
- if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
- || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
- || ! subreg_lowpart_p (lhs)
- || (GET_MODE_CLASS (GET_MODE (lhs))
- != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
- || paradoxical_subreg_p (lhs)
- || VECTOR_MODE_P (GET_MODE (lhs))
- || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
- /* Result might need to be truncated. Don't change mode if
- explicit truncation is needed. */
- || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
- GET_MODE (SUBREG_REG (lhs))))
- return x;
-
- tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
- SUBREG_REG (lhs), SUBREG_REG (rhs));
- return gen_lowpart (GET_MODE (x), tem);
-
default:
return x;
}
Patch B: Re-implement SUBREG case specifically in simplify_set
===================================================================
@@ -6299,6 +6299,7 @@
rtx dest = SET_DEST (x);
enum machine_mode mode
= GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+ rtx src_subreg;
rtx other_insn;
rtx *cc_use;
@@ -6496,6 +6497,10 @@
and X being a REG or (subreg (reg)), we may be able to convert this to
(set (subreg:m2 x) (op)).
+ Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+ we may be able to first distribute the subreg over op, and then apply
+ the above transformation.
+
We can always do this if M1 is narrower than M2 because that means that
we only care about the low bits of the result.
@@ -6504,30 +6509,56 @@
be undefined. On machine where it is defined, this transformation is safe
as long as M1 and M2 have the same number of words. */
+ src_subreg = NULL_RTX;
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
- && !OBJECT_P (SUBREG_REG (src))
+ && !OBJECT_P (SUBREG_REG (src)))
+ src_subreg = SUBREG_REG (src);
+ else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+ || GET_CODE (src) == AND
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx lhs = XEXP (x, 0);
+ rtx rhs = XEXP (x, 1);
+
+ /* We can distribute non-paradoxical lowpart SUBREGs if the
+ inner modes agree. */
+ if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+ && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+ && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+ && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+ /* This is safe in general only for integral modes. */
+ && INTEGRAL_MODE_P (GET_MODE (lhs))
+ && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+ /* Result might need to be truncated. Don't change mode if
+ explicit truncation is needed. */
+ && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+ GET_MODE (SUBREG_REG (lhs))))
+ src_subreg = simplify_gen_binary (GET_CODE (src),
+ GET_MODE (SUBREG_REG (lhs)),
+ SUBREG_REG (lhs), SUBREG_REG (rhs));
+ }
+
+ if (src_subreg
&& (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
/ UNITS_PER_WORD)
- == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
- + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+ == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+ + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
#ifndef WORD_REGISTER_OPERATIONS
&& (GET_MODE_SIZE (GET_MODE (src))
- < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+ < GET_MODE_SIZE (GET_MODE (src_subreg)))
#endif
#ifdef CANNOT_CHANGE_MODE_CLASS
&& ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
- GET_MODE (SUBREG_REG (src)),
+ GET_MODE (src_subreg),
GET_MODE (src)))
#endif
&& (REG_P (dest)
|| (GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest)))))
{
- SUBST (SET_DEST (x),
- gen_lowpart (GET_MODE (SUBREG_REG (src)),
- dest));
- SUBST (SET_SRC (x), SUBREG_REG (src));
+ SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+ SUBST (SET_SRC (x), src_subreg);
src = SET_SRC (x), dest = SET_DEST (x);
}