diff mbox series

sparc/vdso: Add helper function for 64-bit right shift on 32-bit target

Message ID 20240804-sparc-shr64-v1-1-25050968339a@protonmail.com
State New
Headers show
Series sparc/vdso: Add helper function for 64-bit right shift on 32-bit target | expand

Commit Message

Koakuma via B4 Relay Aug. 4, 2024, 3:39 a.m. UTC
From: Koakuma <koachan@protonmail.com>

Add helper function for 64-bit right shift on 32-bit target so that
clang does not emit a runtime library call.

Signed-off-by: Koakuma <koachan@protonmail.com>
---
Hi~

This adds a small function to do 64-bit right shifts for use in vDSO
code, needed so that clang does not emit a call to runtime library.
---
 arch/sparc/vdso/vclock_gettime.c |  8 ++++----
 include/vdso/math64.h            | 28 ++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)


---
base-commit: defaf1a2113a22b00dfa1abc0fd2014820eaf065
change-id: 20240717-sparc-shr64-2f00a7884770

Best regards,

Comments

Thomas Gleixner Aug. 4, 2024, 3:44 p.m. UTC | #1
On Sun, Aug 04 2024 at 10:39, Koakuma via wrote:
> From: Koakuma <koachan@protonmail.com>
>
> Add helper function for 64-bit right shift on 32-bit target so that
> clang does not emit a runtime library call.
>
> Signed-off-by: Koakuma <koachan@protonmail.com>
> ---
> Hi~
>
> This adds a small function to do 64-bit right shifts for use in vDSO
> code, needed so that clang does not emit a call to runtime library.
> ---
>  arch/sparc/vdso/vclock_gettime.c |  8 ++++----
>  include/vdso/math64.h            | 28 ++++++++++++++++++++++++++++

> --- a/include/vdso/math64.h
> +++ b/include/vdso/math64.h
> @@ -21,6 +21,34 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
>  	return ret;
>  }
>  
> +#if BITS_PER_LONG == 32
> +/* This is to prevent the compiler from emitting a call to __lshrdi3. */
> +static __always_inline u64
> +__shr64(u64 val, int amt)
> +{
> +	u32 mask = (1U << amt) - 1;
> +	u32 lo = val;
> +	u32 hi = val >> 32;
> +	u32 mi;
> +
> +	if (amt >= 32)
> +		return hi >> (amt - 32);
> +
> +
> +	mi = (hi & mask) << (32 - amt);
> +	hi >>= amt;
> +	lo = (lo >> amt) | mi;
> +
> +	return ((u64) hi) << 32 | lo;
> +}
> +#else
> +static __always_inline u64
> +__shr64(u64 val, int amt)
> +{
> +	return val >> amt;
> +}

Why does this sparc'ism need to be in generic code?

Thanks,

        tglx
Koakuma Aug. 4, 2024, 5:30 p.m. UTC | #2
Thomas Gleixner <tglx@linutronix.de> wrote:
> Why does this sparc'ism need to be in generic code?

Doesn't x86 also have a couple functions that live in math64.h anyway?
That's why I thought it is fine to put it in there...

In any case, though, I am open to moving the function to sparc directory,
if that is indeed the proper place for that function.
Thomas Gleixner Aug. 4, 2024, 7:16 p.m. UTC | #3
On Sun, Aug 04 2024 at 17:30, Koakuma wrote:
> Thomas Gleixner <tglx@linutronix.de> wrote:
>> Why does this sparc'ism need to be in generic code?
>
> Doesn't x86 also have a couple functions that live in math64.h anyway?

No. Both functions are used in the generic lib/vdso/ code.

> That's why I thought it is fine to put it in there...
>
> In any case, though, I am open to moving the function to sparc directory,
> if that is indeed the proper place for that function.

I think so as sparc is having it's own VDSO implementation and does not
use the generic one.

Thanks,

        tglx
Koakuma Aug. 6, 2024, 2:02 a.m. UTC | #4
Thomas Gleixner <tglx@linutronix.de> wrote:
> I think so as sparc is having it's own VDSO implementation and does not
> use the generic one.

Understood. Lemme move the function to sparc code then.
diff mbox series

Patch

diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index e794edde6755..c0251a632bdb 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -154,7 +154,7 @@  notrace static __always_inline int do_realtime(struct vvar_data *vvar,
 		ts->tv_sec = vvar->wall_time_sec;
 		ns = vvar->wall_time_snsec;
 		ns += vgetsns(vvar);
-		ns >>= vvar->clock.shift;
+		ns = __shr64(ns, vvar->clock.shift);
 	} while (unlikely(vvar_read_retry(vvar, seq)));
 
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
@@ -174,7 +174,7 @@  notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
 		ts->tv_sec = vvar->wall_time_sec;
 		ns = vvar->wall_time_snsec;
 		ns += vgetsns_stick(vvar);
-		ns >>= vvar->clock.shift;
+		ns = __shr64(ns, vvar->clock.shift);
 	} while (unlikely(vvar_read_retry(vvar, seq)));
 
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
@@ -194,7 +194,7 @@  notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
 		ts->tv_sec = vvar->monotonic_time_sec;
 		ns = vvar->monotonic_time_snsec;
 		ns += vgetsns(vvar);
-		ns >>= vvar->clock.shift;
+		ns = __shr64(ns, vvar->clock.shift);
 	} while (unlikely(vvar_read_retry(vvar, seq)));
 
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
@@ -214,7 +214,7 @@  notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
 		ts->tv_sec = vvar->monotonic_time_sec;
 		ns = vvar->monotonic_time_snsec;
 		ns += vgetsns_stick(vvar);
-		ns >>= vvar->clock.shift;
+		ns = __shr64(ns, vvar->clock.shift);
 	} while (unlikely(vvar_read_retry(vvar, seq)));
 
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
diff --git a/include/vdso/math64.h b/include/vdso/math64.h
index 22ae212f8b28..771d84faa8d7 100644
--- a/include/vdso/math64.h
+++ b/include/vdso/math64.h
@@ -21,6 +21,34 @@  __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 	return ret;
 }
 
+#if BITS_PER_LONG == 32
+/* This is to prevent the compiler from emitting a call to __lshrdi3. */
+static __always_inline u64
+__shr64(u64 val, int amt)
+{
+	u32 mask = (1U << amt) - 1;
+	u32 lo = val;
+	u32 hi = val >> 32;
+	u32 mi;
+
+	if (amt >= 32)
+		return hi >> (amt - 32);
+
+
+	mi = (hi & mask) << (32 - amt);
+	hi >>= amt;
+	lo = (lo >> amt) | mi;
+
+	return ((u64) hi) << 32 | lo;
+}
+#else
+static __always_inline u64
+__shr64(u64 val, int amt)
+{
+	return val >> amt;
+}
+#endif /* BITS_PER_LONG == 32 */
+
 #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
 
 #ifndef mul_u64_u32_add_u64_shr