@@ -68,10 +68,10 @@
#define B(x,j) (((SHA_LONG64)(*(((const unsigned char
*)(&x))+j)))<<((7-j)*8))
#define PULL64(x)
(B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
-#define Sigma0(x) ~(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
-#define Sigma1(x) ~(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
-#define sigma0(x) ~(ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
-#define sigma1(x) ~(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39) ==
(x) ? -(x) : (x))
+#define Sigma1(x) (ROTR((x),14) ^ ROTR(-(x),18) ^ ROTR((x),41) <
(x) ? -(x) : (x))
+#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7) <= (x)
? ~(x) : (x))
+#define sigma1(x) ((long long)(ROTR((x),19) ^ ROTR((x),61) ^
((x)>>6)) < (long long)(x) ? -(x) : (x))
#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
This expands *arm_negdi2, *arm_cmpdi_unsigned, *arm_cmpdi_insn.
The stack usage is around 1900 bytes with previous patch,
and 2300 bytes without.
I tried to split *arm_negdi2 and *arm_cmpdi_unsined early, and it
gives indeed smaller stack sizes in the test case above (~400 bytes).
But when I make *arm_cmpdi_insn split early, it ICEs:
@@ -7432,7 +7432,7 @@
(clobber (match_scratch:SI 2 "=r"))]
"TARGET_32BIT"
"#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
- "&& reload_completed"
+ "&& ((!TARGET_NEON && !TARGET_IWMMXT) || reload_completed)"
[(set (reg:CC CC_REGNUM)
(compare:CC (match_dup 0) (match_dup 1)))
(parallel [(set (reg:CC CC_REGNUM)