From patchwork Sat May 14 23:40:43 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chen Gang X-Patchwork-Id: 622316 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3r6lFb5YgDz9t5R for ; Sun, 15 May 2016 10:43:30 +1000 (AEST) Received: from localhost ([::1]:39284 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b1k9Y-0007HO-Q3 for incoming@patchwork.ozlabs.org; Sat, 14 May 2016 20:43:28 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47913) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b1k9J-0006hN-CZ for qemu-devel@nongnu.org; Sat, 14 May 2016 20:43:15 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1b1k9F-0002uO-2G for qemu-devel@nongnu.org; Sat, 14 May 2016 20:43:12 -0400 Received: from out4434.biz.mail.alibaba.com ([47.88.44.34]:32040) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b1k9E-0002u0-BY for qemu-devel@nongnu.org; Sat, 14 May 2016 20:43:08 -0400 X-Alimail-AntiSpam: AC=CONTINUE; BC=0.07445761|-1; FP=0|0|0|0|0|-1|-1|-1; HT=e02c03297; MF=chengang@emindsoft.com.cn; NM=1; PH=DU; RN=9; RT=9; SR=0; TI=SMTPD_----4ooABWA_1463269267; Received: from localhost.localdomain(mailfrom:chengang@emindsoft.com.cn ip:223.72.89.121) by smtp.aliyun-inc.com(10.147.40.2); Sun, 15 May 2016 07:41:17 +0800 From: chengang@emindsoft.com.cn To: rth@twiddle.net, peter.maydell@linaro.org, cmetcalf@ezchip.com, laurent@vivier.eu Date: Sun, 15 May 2016 07:40:43 +0800 Message-Id: <1463269244-3391-5-git-send-email-chengang@emindsoft.com.cn> X-Mailer: git-send-email 1.9.3 In-Reply-To: <1463269244-3391-1-git-send-email-chengang@emindsoft.com.cn> References: <1463269244-3391-1-git-send-email-chengang@emindsoft.com.cn> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 47.88.44.34 Subject: [Qemu-devel] [PATCH v6 4/5] target-tilegx/helper-fdouble.c: Implement double floating point X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: chenwei@emindsoft.com.cn, riku.voipio@iki.fi, Chen Gang , qemu-devel@nongnu.org, Chen Gang Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" From: Chen Gang Signed-off-by: Chen Gang --- target-tilegx/helper-fdouble.c | 365 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 target-tilegx/helper-fdouble.c diff --git a/target-tilegx/helper-fdouble.c b/target-tilegx/helper-fdouble.c new file mode 100644 index 0000000..5147583 --- /dev/null +++ b/target-tilegx/helper-fdouble.c @@ -0,0 +1,365 @@ +/* + * QEMU TILE-Gx helpers + * + * Copyright (c) 2015 Chen Gang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu-common.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" + +#include "helper-fshared.h" + +/* + * FDouble instructions implemenation: + * + * fdouble_unpack_min ; srca and srcb are float_64 value. + * ; get the min absolute value's mantissa. + * ; move "mantissa >> (exp_max - exp_min)" to dest. + * + * fdouble_unpack_max ; srca and srcb are float_64 value. + * ; get the max absolute value's mantissa. + * ; move mantissa to dest. + * + * fdouble_add_flags ; srca and srcb are float_64 value. + * ; calc exp (exp_max), sign, and comp bits for flags. + * ; set addsub bit to flags and move flags to dest. + * + * fdouble_sub_flags ; srca and srcb are float_64 value. + * ; calc exp (exp_max), sign, and comp bits for flags. + * ; set addsub bit to flags and move flags to dest. + * + * fdouble_addsub: ; dest, srca (max, min mantissa), and srcb (flags). + * ; "dest +/- srca" depend on the add/sub bit of flags. + * ; move result mantissa to dest. + * + * fdouble_mul_flags: ; srca and srcb are float_64 value. + * ; calc sign (xor), exp (min + max), and comp bits. + * ; mix sign, exp, and comp bits as flags to dest. + * + * fdouble_pack1 ; move srcb (flags) to dest. + * + * fdouble_pack2 ; srca, srcb (high, low mantissa), and dest (flags) + * ; normalize and pack result from srca, srcb, and dest. + * ; move result to dest. + */ + +#define TILEGX_F_EXP_DZERO 0x3ff /* Zero exp for double 11-bits */ +#define TILEGX_F_EXP_DMAX 0x7fe /* max exp for double 11-bits */ +#define TILEGX_F_EXP_DUF 0x1000/* underflow exp bit for double */ + +#define TILEGX_F_MAN_HBIT (1ULL << 59) + +#define TILEGX_F_CALC_ADD 1 /* Perform absolute add operation */ +#define TILEGX_F_CALC_SUB 2 /* Perform absolute sub operation */ +#define TILEGX_F_CALC_MUL 3 /* Perform absolute mul operation */ + +static uint32_t get_f64_exp(float64 d) +{ + return extract64(float64_val(d), 52, 11); +} + +static uint64_t get_f64_man(float64 d) +{ + return extract64(float64_val(d), 0, 52); +} + +static uint64_t fr_to_man(float64 d) +{ + uint64_t val = get_f64_man(d) << 7; + + if (get_f64_exp(d)) { + val |= TILEGX_F_MAN_HBIT; /* Restore HBIT for the next calculation */ + } + + return val; +} + +static uint64_t get_fdouble_man(uint64_t n) +{ + return extract64(n, 0, 60); +} + +static void set_fdouble_man(uint64_t *n, uint64_t man) +{ + *n = deposit64(*n, 0, 60, man); +} + +static uint64_t get_fdouble_man_of(uint64_t n) +{ + return test_bit(60, &n); +} + +static void clear_fdouble_man_of(uint64_t *n) +{ + return clear_bit(60, n); +} + +static uint32_t get_fdouble_nan(uint64_t n) +{ + return test_bit(24, &n); +} + +static void set_fdouble_nan(uint64_t *n) +{ + set_bit(24, n); +} + +static uint32_t get_fdouble_inf(uint64_t n) +{ + return test_bit(23, &n); +} + +static void set_fdouble_inf(uint64_t *n) +{ + set_bit(23, n); +} + +static uint32_t get_fdouble_calc(uint64_t n) +{ + return extract32(n, 21, 2); +} + +static void set_fdouble_calc(uint64_t *n, uint32_t calc) +{ + *n = deposit64(*n, 21, 2, calc); +} + +static uint32_t get_fdouble_sign(uint64_t n) +{ + return test_bit(20, &n); +} + +static void set_fdouble_sign(uint64_t *n) +{ + set_bit(20, n); +} + +static uint32_t get_fdouble_vexp(uint64_t n) +{ + return extract32(n, 7, 13); +} + +static void set_fdouble_vexp(uint64_t *n, uint32_t vexp) +{ + *n = deposit64(*n, 7, 13, vexp); +} + +static uint64_t shift64RightJamming(uint64_t a, int_fast16_t count) +{ + if (count == 0) { + return a; + } else if (count < 64) { + return (a >> count) | ((a << ((0 - count) & 63)) != 0); + } + return (a != 0); +} + +uint64_t helper_fdouble_unpack_min(uint64_t srca, uint64_t srcb) +{ + uint64_t v = 0; + uint32_t expa = get_f64_exp(srca); + uint32_t expb = get_f64_exp(srcb); + + if (float64_is_any_nan(srca) || float64_is_any_nan(srcb) + || float64_is_infinity(srca) || float64_is_infinity(srcb)) { + return 0; + } else if (expa > expb) { + set_fdouble_man(&v, shift64RightJamming(fr_to_man(srcb), expa - expb)); + } else if (expa < expb) { + set_fdouble_man(&v, shift64RightJamming(fr_to_man(srca), expb - expa)); + } else if (get_f64_man(srca) > get_f64_man(srcb)) { + set_fdouble_man(&v, fr_to_man(srcb)); + } else { + set_fdouble_man(&v, fr_to_man(srca)); + } + + return v; +} + +uint64_t helper_fdouble_unpack_max(uint64_t srca, uint64_t srcb) +{ + uint64_t v = 0; + uint32_t expa = get_f64_exp(srca); + uint32_t expb = get_f64_exp(srcb); + + if (float64_is_any_nan(srca) || float64_is_any_nan(srcb) + || float64_is_infinity(srca) || float64_is_infinity(srcb)) { + return 0; + } else if (expa > expb) { + set_fdouble_man(&v, fr_to_man(srca)); + } else if (expa < expb) { + set_fdouble_man(&v, fr_to_man(srcb)); + } else if (get_f64_man(srca) > get_f64_man(srcb)) { + set_fdouble_man(&v, fr_to_man(srca)); + } else { + set_fdouble_man(&v, fr_to_man(srcb)); + } + + return v; +} + +uint64_t helper_fdouble_addsub(uint64_t dest, uint64_t srca, uint64_t srcb) +{ + if (get_fdouble_calc(srcb) == TILEGX_F_CALC_ADD) { + return dest + srca; /* maybe set addsub overflow bit */ + } else { + return dest - srca; + } +} + +/* absolute-add/mul may cause add/mul carry or overflow */ +static bool proc_oflow(uint64_t *flags, uint64_t *v, uint64_t *srcb) +{ + if (get_fdouble_man_of(*v)) { + set_fdouble_vexp(flags, get_fdouble_vexp(*flags) + 1); + *srcb >>= 1; + *srcb |= *v << 63; + *v >>= 1; + clear_fdouble_man_of(v); + } + return get_fdouble_vexp(*flags) > TILEGX_F_EXP_DMAX; +} + +uint64_t helper_fdouble_pack2(uint64_t flags, uint64_t srca, uint64_t srcb) +{ + DEC_INIT_FPSTATUS; + uint64_t v = srca; + float64 d = float64_set_sign(float64_zero, get_fdouble_sign(flags)); + + if (get_fdouble_nan(flags)) { + return float64_val(float64_default_nan); + } else if (get_fdouble_inf(flags)) { + return float64_val(d | float64_infinity); + } + + /* absolute-mul needs left shift 4 + 1 bytes to match the real mantissa */ + if (get_fdouble_calc(flags) == TILEGX_F_CALC_MUL) { + v <<= 5; + v |= srcb >> 59; + srcb <<= 5; + } + v |= (srcb != 0); + + /* must check underflow, firstly */ + if (get_fdouble_vexp(flags) & TILEGX_F_EXP_DUF) { + return float64_val(d); + } + + if (proc_oflow(&flags, &v, &srcb)) { + return float64_val(d | float64_infinity); + } + + while (!(get_fdouble_man(v) & TILEGX_F_MAN_HBIT) + && (get_fdouble_man(v) | srcb)) { + set_fdouble_vexp(&flags, get_fdouble_vexp(flags) - 1); + set_fdouble_man(&v, get_fdouble_man(v) << 1); + set_fdouble_man(&v, get_fdouble_man(v) | (srcb >> 63)); + srcb <<= 1; + } + + /* check underflow, again, after format */ + if ((get_fdouble_vexp(flags) & TILEGX_F_EXP_DUF) || !get_fdouble_man(v)) { + return float64_val(d); + } + + return float64_val(normalize_roundpack_float64(get_fdouble_sign(flags), + get_fdouble_vexp(flags), + get_fdouble_man(v), + &fp_status)); +} + +static uint64_t main_calc(float64 fsrca, float64 fsrcb, + float64 (*calc)(float64, float64, float_status*)) +{ + DEC_INIT_FPSTATUS; + float64 d; + uint64_t flags = 0; + uint32_t expa = get_f64_exp(fsrca); + uint32_t expb = get_f64_exp(fsrcb); + + if (float64_eq(fsrca, fsrcb, &fp_status)) { + flags |= create_fsfd_flag_eq(); + } else { + flags |= create_fsfd_flag_ne(); + } + + if (float64_lt(fsrca, fsrcb, &fp_status)) { + flags |= create_fsfd_flag_lt(); + } + if (float64_le(fsrca, fsrcb, &fp_status)) { + flags |= create_fsfd_flag_le(); + } + + if (float64_lt(fsrcb, fsrca, &fp_status)) { + flags |= create_fsfd_flag_gt(); + } + if (float64_le(fsrcb, fsrca, &fp_status)) { + flags |= create_fsfd_flag_ge(); + } + + if (float64_unordered(fsrca, fsrcb, &fp_status)) { + flags |= create_fsfd_flag_un(); + } + + d = calc(fsrca, fsrcb, &fp_status); + if (float64_is_neg(d)) { + set_fdouble_sign(&flags); + } + + if (float64_is_any_nan(d)) { + set_fdouble_nan(&flags); + } else if (float64_is_infinity(d)) { + set_fdouble_inf(&flags); + } else if (calc == float64_add) { + set_fdouble_vexp(&flags, (expa > expb) ? expa : expb); + set_fdouble_calc(&flags, + (float64_is_neg(fsrca) == float64_is_neg(fsrcb)) + ? TILEGX_F_CALC_ADD : TILEGX_F_CALC_SUB); + + } else if (calc == float64_sub) { + set_fdouble_vexp(&flags, (expa > expb) ? expa : expb); + set_fdouble_calc(&flags, + (float64_is_neg(fsrca) != float64_is_neg(fsrcb)) + ? TILEGX_F_CALC_ADD : TILEGX_F_CALC_SUB); + + } else { + set_fdouble_vexp(&flags, (int64_t)(expa - TILEGX_F_EXP_DZERO) + + (int64_t)(expb - TILEGX_F_EXP_DZERO) + + TILEGX_F_EXP_DZERO); + set_fdouble_calc(&flags, TILEGX_F_CALC_MUL); + } + + return flags; +} + +uint64_t helper_fdouble_add_flags(uint64_t srca, uint64_t srcb) +{ + return main_calc(make_float64(srca), make_float64(srcb), float64_add); +} + +uint64_t helper_fdouble_sub_flags(uint64_t srca, uint64_t srcb) +{ + return main_calc(make_float64(srca), make_float64(srcb), float64_sub); +} + +uint64_t helper_fdouble_mul_flags(uint64_t srca, uint64_t srcb) +{ + return main_calc(make_float64(srca), make_float64(srcb), float64_mul); +}