@@ -40,8 +40,10 @@ (define_c_enum "unspec" [
UNSPEC_LASX_XVFCVTL
UNSPEC_LASX_XVFLOGB
UNSPEC_LASX_XVFRECIP
+ UNSPEC_LASX_XVFRECIPE
UNSPEC_LASX_XVFRINT
UNSPEC_LASX_XVFRSQRT
+ UNSPEC_LASX_XVFRSQRTE
UNSPEC_LASX_XVFCMP_SAF
UNSPEC_LASX_XVFCMP_SEQ
UNSPEC_LASX_XVFCMP_SLE
@@ -1688,6 +1690,17 @@ (define_insn "lasx_xvfrecip_<flasxfmt>"
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "lasx_xvfrecipe_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRECIPE))]
+ "ISA_HAS_LASX"
+ "xvfrecipe.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lasx_xvfrint_<flasxfmt>"
[(set (match_operand:FLASX 0 "register_operand" "=f")
(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
@@ -1706,6 +1719,17 @@ (define_insn "lasx_xvfrsqrt_<flasxfmt>"
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "lasx_xvfrsqrte_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRSQRTE))]
+ "ISA_HAS_LASX"
+ "xvfrsqrte.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>"
[(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
@@ -2399,6 +2399,22 @@ __m256d __lasx_xvfrecip_d (__m256d _1)
return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1);
}
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V8SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_xvfrecipe_s (__m256 _1)
+{
+ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V4DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_xvfrecipe_d (__m256d _1)
+{
+ return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1);
+}
+
/* Assembly instruction format: xd, xj. */
/* Data types in instruction templates: V8SF, V8SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -2431,6 +2447,22 @@ __m256d __lasx_xvfrsqrt_d (__m256d _1)
return (__m256d)__builtin_lasx_xvfrsqrt_d ((v4f64)_1);
}
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V8SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_xvfrsqrte_s (__m256 _1)
+{
+ return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V4DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_xvfrsqrte_d (__m256d _1)
+{
+ return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1);
+}
+
/* Assembly instruction format: xd, xj. */
/* Data types in instruction templates: V8SF, V8SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1195,10 +1195,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
LSX_BUILTIN (vfsqrt_d, LARCH_V2DF_FTYPE_V2DF),
LSX_BUILTIN (vfrecip_s, LARCH_V4SF_FTYPE_V4SF),
LSX_BUILTIN (vfrecip_d, LARCH_V2DF_FTYPE_V2DF),
+ LSX_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF),
+ LSX_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF),
LSX_BUILTIN (vfrint_s, LARCH_V4SF_FTYPE_V4SF),
LSX_BUILTIN (vfrint_d, LARCH_V2DF_FTYPE_V2DF),
LSX_BUILTIN (vfrsqrt_s, LARCH_V4SF_FTYPE_V4SF),
LSX_BUILTIN (vfrsqrt_d, LARCH_V2DF_FTYPE_V2DF),
+ LSX_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF),
+ LSX_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF),
LSX_BUILTIN (vflogb_s, LARCH_V4SF_FTYPE_V4SF),
LSX_BUILTIN (vflogb_d, LARCH_V2DF_FTYPE_V2DF),
LSX_BUILTIN (vfcvth_s_h, LARCH_V4SF_FTYPE_V8HI),
@@ -1901,10 +1905,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
LASX_BUILTIN (xvfsqrt_d, LARCH_V4DF_FTYPE_V4DF),
LASX_BUILTIN (xvfrecip_s, LARCH_V8SF_FTYPE_V8SF),
LASX_BUILTIN (xvfrecip_d, LARCH_V4DF_FTYPE_V4DF),
+ LASX_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF),
+ LASX_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF),
LASX_BUILTIN (xvfrint_s, LARCH_V8SF_FTYPE_V8SF),
LASX_BUILTIN (xvfrint_d, LARCH_V4DF_FTYPE_V4DF),
LASX_BUILTIN (xvfrsqrt_s, LARCH_V8SF_FTYPE_V8SF),
LASX_BUILTIN (xvfrsqrt_d, LARCH_V4DF_FTYPE_V4DF),
+ LASX_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF),
+ LASX_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF),
LASX_BUILTIN (xvflogb_s, LARCH_V8SF_FTYPE_V8SF),
LASX_BUILTIN (xvflogb_d, LARCH_V4DF_FTYPE_V4DF),
LASX_BUILTIN (xvfcvth_s_h, LARCH_V8SF_FTYPE_V16HI),
@@ -59,6 +59,12 @@ (define_c_enum "unspec" [
;; Stack tie
UNSPEC_TIE
+ ;; RSQRT
+ UNSPEC_RSQRTE
+
+ ;; RECIP
+ UNSPEC_RECIPE
+
;; CRC
UNSPEC_CRC
UNSPEC_CRCC
@@ -220,6 +226,7 @@ (define_attr "qword_mode" "no,yes"
;; fmadd floating point multiply-add
;; fdiv floating point divide
;; frdiv floating point reciprocal divide
+;; frecipe floating point approximate reciprocal
;; fabs floating point absolute value
;; flogb floating point exponent extract
;; fneg floating point negation
@@ -229,6 +236,7 @@ (define_attr "qword_mode" "no,yes"
;; fscaleb floating point scale
;; fsqrt floating point square root
;; frsqrt floating point reciprocal square root
+;; frsqrte floating point approximate reciprocal square root
;; multi multiword sequence (or user asm statements)
;; atomic atomic memory update instruction
;; syncloop memory atomic operation implemented as a sync loop
@@ -238,8 +246,8 @@ (define_attr "type"
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
- fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost,
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
+ fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost,
simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
@@ -911,6 +919,18 @@ (define_insn "*recip<mode>3"
[(set_attr "type" "frdiv")
(set_attr "mode" "<UNITMODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "recipe<mode>2"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ UNSPEC_RECIPE))]
+ "TARGET_HARD_FLOAT"
+ "frecipe.<fmt>\t%0,%1"
+ [(set_attr "type" "frecipe")
+ (set_attr "mode" "<UNITMODE>")
+ (set_attr "insn_count" "1")])
+
;; Integer division and modulus.
(define_expand "<optab><mode>3"
[(set (match_operand:GPR 0 "register_operand")
@@ -1136,6 +1156,17 @@ (define_insn "*rsqrt<mode>b"
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
(set_attr "insn_count" "1")])
+
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "rsqrte<mode>"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ UNSPEC_RSQRTE))]
+ "TARGET_HARD_FLOAT"
+ "frsqrte.<fmt>\t%0,%1"
+ [(set_attr "type" "frsqrte")
+ (set_attr "mode" "<UNITMODE>")])
;;
;; ....................
@@ -42,8 +42,10 @@ (define_c_enum "unspec" [
UNSPEC_LSX_VFCVTL
UNSPEC_LSX_VFLOGB
UNSPEC_LSX_VFRECIP
+ UNSPEC_LSX_VFRECIPE
UNSPEC_LSX_VFRINT
UNSPEC_LSX_VFRSQRT
+ UNSPEC_LSX_VFRSQRTE
UNSPEC_LSX_VFCMP_SAF
UNSPEC_LSX_VFCMP_SEQ
UNSPEC_LSX_VFCMP_SLE
@@ -1616,6 +1618,17 @@ (define_insn "lsx_vfrecip_<flsxfmt>"
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "lsx_vfrecipe_<flsxfmt>"
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+ UNSPEC_LSX_VFRECIPE))]
+ "ISA_HAS_LSX"
+ "vfrecipe.<flsxfmt>\t%w0,%w1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lsx_vfrint_<flsxfmt>"
[(set (match_operand:FLSX 0 "register_operand" "=f")
(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
@@ -1634,6 +1647,17 @@ (define_insn "lsx_vfrsqrt_<flsxfmt>"
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "lsx_vfrsqrte_<flsxfmt>"
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+ UNSPEC_LSX_VFRSQRTE))]
+ "ISA_HAS_LSX"
+ "vfrsqrte.<flsxfmt>\t%w0,%w1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lsx_vftint_s_<ilsxfmt>_<flsxfmt>"
[(set (match_operand:<VIMODE> 0 "register_operand" "=f")
(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")]
@@ -2480,6 +2480,22 @@ __m128d __lsx_vfrecip_d (__m128d _1)
return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1);
}
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V4SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lsx_vfrecipe_s (__m128 _1)
+{
+ return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1);
+}
+
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V2DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lsx_vfrecipe_d (__m128d _1)
+{
+ return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1);
+}
+
/* Assembly instruction format: vd, vj. */
/* Data types in instruction templates: V4SF, V4SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -2512,6 +2528,22 @@ __m128d __lsx_vfrsqrt_d (__m128d _1)
return (__m128d)__builtin_lsx_vfrsqrt_d ((v2f64)_1);
}
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V4SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lsx_vfrsqrte_s (__m128 _1)
+{
+ return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1);
+}
+
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V2DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lsx_vfrsqrte_d (__m128d _1)
+{
+ return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1);
+}
+
/* Assembly instruction format: vd, vj. */
/* Data types in instruction templates: V4SF, V4SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))