[1/5] LoongArch: Add support for approximate instructions.

Message ID	20231128032938.17202-2-xujiahao@loongson.cn
State	New
Headers	show Return-Path: <gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 09B6A3858C53 From: Jiahao Xu <xujiahao@loongson.cn> To: gcc-patches@gcc.gnu.org Cc: xry111@xry111.site, i@xen0n.name, chenglulu@loongson.cn, xuchenghua@loongson.cn, Jiahao Xu <xujiahao@loongson.cn> Subject: [PATCH 1/5] LoongArch: Add support for approximate instructions. Date: Tue, 28 Nov 2023 11:29:34 +0800 Message-Id: <20231128032938.17202-2-xujiahao@loongson.cn> In-Reply-To: <20231128032938.17202-1-xujiahao@loongson.cn> References: <20231128032938.17202-1-xujiahao@loongson.cn> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	LoongArch: Add -mrecip option support \| expand [0/5] LoongArch: Add -mrecip option support [1/5] LoongArch: Add support for approximate instructions. [2/5] LoongArch: Use standard pattern name for xvfrsqrt/vfrsqrt instructions. [3/5] LoongArch: Redefine pattern for xvfrecip/vfrecip instructions. [4/5] LoongArch: New options -mrecip and -mrecip= with ffast-math. [5/5] LoongArch: Vectorized loop unrolling is not performed on divf/sqrtf/rsqrtf with turns on -mre…

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 2e11f061202..dd60d2bfed3 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -40,8 +40,10 @@ (define_c_enum "unspec" [ UNSPEC_LASX_XVFCVTL UNSPEC_LASX_XVFLOGB UNSPEC_LASX_XVFRECIP + UNSPEC_LASX_XVFRECIPE UNSPEC_LASX_XVFRINT UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SEQ UNSPEC_LASX_XVFCMP_SLE @@ -1688,6 +1690,17 @@ (define_insn "lasx_xvfrecip_<flasxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lasx_xvfrecipe_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRECIPE))] + "ISA_HAS_LASX" + "xvfrecipe.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvfrint_<flasxfmt>" [(set (match_operand:FLASX 0 "register_operand" "=f") (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] @@ -1706,6 +1719,17 @@ (define_insn "lasx_xvfrsqrt_<flasxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lasx_xvfrsqrte_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRTE))] + "ISA_HAS_LASX" + "xvfrsqrte.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>" [(set (match_operand:<VIMODE256> 0 "register_operand" "=f") (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h index 7bce2c757f1..3017361a924 100644 --- a/gcc/config/loongarch/lasxintrin.h +++ b/gcc/config/loongarch/lasxintrin.h @@ -2399,6 +2399,22 @@ __m256d __lasx_xvfrecip_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); } +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecipe_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecipe_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); +} + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2431,6 +2447,22 @@ __m256d __lasx_xvfrsqrt_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrsqrt_d ((v4f64)_1); } +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrte_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrte_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); +} + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index db02aacdc3f..47f658d6ab5 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -1195,10 +1195,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LSX_BUILTIN (vfsqrt_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrecip_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrecip_d, LARCH_V2DF_FTYPE_V2DF), + LSX_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF), + LSX_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrint_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrint_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrsqrt_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrsqrt_d, LARCH_V2DF_FTYPE_V2DF), + LSX_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF), + LSX_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vflogb_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vflogb_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfcvth_s_h, LARCH_V4SF_FTYPE_V8HI), @@ -1901,10 +1905,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LASX_BUILTIN (xvfsqrt_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrecip_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrecip_d, LARCH_V4DF_FTYPE_V4DF), + LASX_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF), + LASX_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrint_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrint_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrsqrt_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrsqrt_d, LARCH_V4DF_FTYPE_V4DF), + LASX_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF), + LASX_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvflogb_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvflogb_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfcvth_s_h, LARCH_V8SF_FTYPE_V16HI), diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index cd4ed495697..7b09926d1a7 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -59,6 +59,12 @@ (define_c_enum "unspec" [ ;; Stack tie UNSPEC_TIE + ;; RSQRT + UNSPEC_RSQRTE + + ;; RECIP + UNSPEC_RECIPE + ;; CRC UNSPEC_CRC UNSPEC_CRCC @@ -220,6 +226,7 @@ (define_attr "qword_mode" "no,yes" ;; fmadd floating point multiply-add ;; fdiv floating point divide ;; frdiv floating point reciprocal divide +;; frecipe floating point approximate reciprocal ;; fabs floating point absolute value ;; flogb floating point exponent extract ;; fneg floating point negation @@ -229,6 +236,7 @@ (define_attr "qword_mode" "no,yes" ;; fscaleb floating point scale ;; fsqrt floating point square root ;; frsqrt floating point reciprocal square root +;; frsqrte floating point approximate reciprocal square root ;; multi multiword sequence (or user asm statements) ;; atomic atomic memory update instruction ;; syncloop memory atomic operation implemented as a sync loop @@ -238,8 +246,8 @@ (define_attr "type" "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, shift,slt,signext,clz,trap,imul,idiv,move, - fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, - fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, + fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, + fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, @@ -911,6 +919,18 @@ (define_insn "*recip<mode>3" [(set_attr "type" "frdiv") (set_attr "mode" "<UNITMODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "recipe<mode>2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RECIPE))] + "TARGET_HARD_FLOAT" + "frecipe.<fmt>\t%0,%1" + [(set_attr "type" "frecipe") + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")]) + ;; Integer division and modulus. (define_expand "<optab><mode>3" [(set (match_operand:GPR 0 "register_operand") @@ -1136,6 +1156,17 @@ (define_insn "*rsqrt<mode>b" [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") (set_attr "insn_count" "1")]) + +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "rsqrte<mode>" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRTE))] + "TARGET_HARD_FLOAT" + "frsqrte.<fmt>\t%0,%1" + [(set_attr "type" "frsqrte") + (set_attr "mode" "<UNITMODE>")]) ;; ;; .................... diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 5e8d8d74b43..391e84f8d1d 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -42,8 +42,10 @@ (define_c_enum "unspec" [ UNSPEC_LSX_VFCVTL UNSPEC_LSX_VFLOGB UNSPEC_LSX_VFRECIP + UNSPEC_LSX_VFRECIPE UNSPEC_LSX_VFRINT UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SLE @@ -1616,6 +1618,17 @@ (define_insn "lsx_vfrecip_<flsxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lsx_vfrecipe_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRECIPE))] + "ISA_HAS_LSX" + "vfrecipe.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vfrint_<flsxfmt>" [(set (match_operand:FLSX 0 "register_operand" "=f") (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] @@ -1634,6 +1647,17 @@ (define_insn "lsx_vfrsqrt_<flsxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lsx_vfrsqrte_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRTE))] + "ISA_HAS_LSX" + "vfrsqrte.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vftint_s_<ilsxfmt>_<flsxfmt>" [(set (match_operand:<VIMODE> 0 "register_operand" "=f") (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h index 29553c093fa..e1e0df2971c 100644 --- a/gcc/config/loongarch/lsxintrin.h +++ b/gcc/config/loongarch/lsxintrin.h @@ -2480,6 +2480,22 @@ __m128d __lsx_vfrecip_d (__m128d _1) return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); } +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecipe_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecipe_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); +} + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2512,6 +2528,22 @@ __m128d __lsx_vfrsqrt_d (__m128d _1) return (__m128d)__builtin_lsx_vfrsqrt_d ((v2f64)_1); } +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrte_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrte_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); +} + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))

[1/5] LoongArch: Add support for approximate instructions.

Commit Message

Patch