Message ID | Pine.LNX.4.64.1310082337080.23637@digraph.polyomino.org.uk (mailing list archive) |
---|---|
State | Accepted, archived |
Commit | 28fbf1d540920ad6722fa6ac15237a307932bc9b |
Headers | show |
On Tue, 8 Oct 2013, Joseph S. Myers wrote: > I'll send as a followup the testcase I used for verifying that the > instructions (other than the theoretical conversions to 64-bit > integers) produce the correct results. In addition, this has been > tested with the glibc testsuite (with the e500 port as posted at > <https://sourceware.org/ml/libc-alpha/2013-10/msg00195.html>, where it > improves the libm test results. Here is that testcase. #include <stdio.h> #include <stdlib.h> #define INFF __builtin_inff () #define INFD __builtin_inf () #define NANF __builtin_nanf ("") #define NAND __builtin_nan ("") /* e500 rounding modes: 0 = nearest, 1 = zero, 2 = up, 3 = down. */ static inline void set_rm (unsigned int mode) { unsigned int spefscr; asm volatile ("mfspefscr %0" : "=r" (spefscr)); spefscr = (spefscr & ~3) | mode; asm volatile ("mtspefscr %0" : : "r" (spefscr)); } static int success_count, failure_count; struct float_test_data { float input; unsigned int expected[4]; }; struct double_test_data { double input; unsigned int expected[4]; }; typedef float vfloat __attribute__ ((vector_size (8))); typedef unsigned int vuint __attribute__ ((vector_size (8))); union vfloat_union { vfloat vf; float f[2]; }; union vuint_union { vuint vui; unsigned int ui[2]; }; #define T(A, B, C, D, E) { (A), { (B), (C), (D), (E) } } #define TZ(A, B) T (A, B, B, B, B) static void check_result (const char *insn, double input, unsigned int rm, unsigned int expected, unsigned int res) { if (res == expected) success_count++; else { failure_count++; printf ("%s %a mode %u expected 0x%x (%d) got 0x%x (%d)\n", insn, input, rm, expected, (int) expected, res, (int) res); } } #define RUN_FLOAT_TESTS(INSN) \ static void \ test_##INSN (void) \ { \ size_t i; \ for (i = 0; \ i < sizeof (INSN##_test_data) / sizeof (INSN##_test_data[0]); \ i++) \ { \ unsigned int rm; \ for (rm = 0; rm <= 3; rm++) \ { \ set_rm (rm); \ unsigned int res; \ asm volatile (#INSN " %0, %1" \ : "=&r" (res) \ : "r" (INSN##_test_data[i].input)); \ check_result (#INSN, INSN##_test_data[i].input, rm, \ INSN##_test_data[i].expected[rm], res); \ } \ } \ } #define RUN_VFLOAT_TESTS(INSN, TINSN) \ static void \ test_##INSN (void) \ { \ size_t i; \ for (i = 0; \ i < sizeof (TINSN##_test_data) / sizeof (TINSN##_test_data[0]); \ i++) \ { \ unsigned int rm; \ for (rm = 0; rm <= 3; rm++) \ { \ set_rm (rm); \ union vfloat_union varg; \ union vuint_union vres; \ varg.f[0] = TINSN##_test_data[i].input; \ varg.f[1] = 0; \ asm volatile (#INSN " %0, %1" \ : "=&r" (vres.vui) \ : "r" (varg.vf)); \ check_result (#INSN " (high)", TINSN##_test_data[i].input, \ rm, TINSN##_test_data[i].expected[rm], \ vres.ui[0]); \ check_result (#INSN " (low 0)", TINSN##_test_data[i].input, \ rm, 0, vres.ui[1]); \ varg.f[1] = TINSN##_test_data[i].input; \ varg.f[0] = 0; \ asm volatile (#INSN " %0, %1" \ : "=&r" (vres.vui) \ : "r" (varg.vf)); \ check_result (#INSN " (low)", TINSN##_test_data[i].input, \ rm, TINSN##_test_data[i].expected[rm], \ vres.ui[1]); \ check_result (#INSN " (high 0)", TINSN##_test_data[i].input, \ rm, 0, vres.ui[0]); \ } \ } \ } static const struct float_test_data efsctsiz_test_data[] = { TZ (NANF, 0), TZ (INFF, 0x7fffffff), TZ (0x1.fffffep127f, 0x7fffffff), TZ (0x1p31f, 0x7fffffff), TZ (0x1.fffffep30f, 0x7fffff80), TZ (1.6f, 1), TZ (1.5f, 1), TZ (1.4f, 1), TZ (1.0f, 1), TZ (0.6f, 0), TZ (0.5f, 0), TZ (0.4f, 0), TZ (0x1p-149f, 0), TZ (0.0f, 0), TZ (-0.0f, 0), TZ (-0x1p-149f, 0), TZ (-0.4f, 0), TZ (-0.5f, 0), TZ (-0.6f, 0), TZ (-1.0f, -1), TZ (-1.4f, -1), TZ (-1.5f, -1), TZ (-1.6f, -1), TZ (-0x1.fffffep30f, 0x80000080), TZ (-0x1p31f, 0x80000000), TZ (-0x1.fffffep127f, 0x80000000), TZ (-INFF, 0x80000000), TZ (-NANF, 0), }; static const struct float_test_data efsctuiz_test_data[] = { TZ (NANF, 0), TZ (INFF, 0xffffffff), TZ (0x1.fffffep127f, 0xffffffff), TZ (0x1p32f, 0xffffffff), TZ (0x1.fffffep31f, 0xffffff00), TZ (1.6f, 1), TZ (1.5f, 1), TZ (1.4f, 1), TZ (1.0f, 1), TZ (0.6f, 0), TZ (0.5f, 0), TZ (0.4f, 0), TZ (0x1p-149f, 0), TZ (0.0f, 0), TZ (-0.0f, 0), TZ (-0x1p-149f, 0), TZ (-0.4f, 0), TZ (-0.5f, 0), TZ (-0.6f, 0), TZ (-1.0f, 0), TZ (-1.4f, 0), TZ (-1.5f, 0), TZ (-1.6f, 0), TZ (-0x1.fffffep127f, 0), TZ (-INFF, 0), TZ (-NANF, 0), }; static const struct double_test_data efdctsiz_test_data[] = { TZ (NAND, 0), TZ (INFD, 0x7fffffff), TZ (0x1.fffffffffffffp1023, 0x7fffffff), TZ (0x1.0000000000001p31, 0x7fffffff), TZ (0x1p31, 0x7fffffff), TZ (0x1.fffffffffffffp30, 0x7fffffff), TZ (0x1.fffffffcp30, 0x7fffffff), TZ (1.6, 1), TZ (1.5, 1), TZ (1.4, 1), TZ (1.0, 1), TZ (0.6, 0), TZ (0.5, 0), TZ (0.4, 0), TZ (0x1p-1074, 0), TZ (0.0, 0), TZ (-0.0, 0), TZ (-0x1p-1074, 0), TZ (-0.4, 0), TZ (-0.5, 0), TZ (-0.6, 0), TZ (-1.0, -1), TZ (-1.4, -1), TZ (-1.5, -1), TZ (-1.6, -1), TZ (-0x1.fffffffcp30, 0x80000001), TZ (-0x1.fffffffffffffp30, 0x80000001), TZ (-0x1p31, 0x80000000), TZ (-0x1.0000000000001p31, 0x80000000), TZ (-0x1.fffffffffffffp1023, 0x80000000), TZ (-INFD, 0x80000000), TZ (-NAND, 0), }; static const struct double_test_data efdctuiz_test_data[] = { TZ (NAND, 0), TZ (INFD, 0xffffffff), TZ (0x1.fffffffffffffp1023, 0xffffffff), TZ (0x1.0000000000001p32, 0xffffffff), TZ (0x1p32, 0xffffffff), TZ (0x1.fffffffffffffp31, 0xffffffff), TZ (1.6, 1), TZ (1.5, 1), TZ (1.4, 1), TZ (1.0, 1), TZ (0.6, 0), TZ (0.5, 0), TZ (0.4, 0), TZ (0x1p-1074, 0), TZ (0.0, 0), TZ (-0.0, 0), TZ (-0x1p-1074, 0), TZ (-0.4, 0), TZ (-0.5, 0), TZ (-0.6, 0), TZ (-1.0, 0), TZ (-1.4, 0), TZ (-1.5, 0), TZ (-1.6, 0), TZ (-0x1.fffffffffffffp1023, 0), TZ (-INFD, 0), TZ (-NAND, 0), }; static const struct float_test_data efsctsi_test_data[] = { TZ (NANF, 0), TZ (INFF, 0x7fffffff), TZ (0x1.fffffep127f, 0x7fffffff), TZ (0x1p31f, 0x7fffffff), TZ (0x1.fffffep30f, 0x7fffff80), T (1.6f, 2, 1, 2, 1), T (1.5f, 2, 1, 2, 1), T (1.4f, 1, 1, 2, 1), TZ (1.0f, 1), T (0.6f, 1, 0, 1, 0), T (0.5f, 0, 0, 1, 0), T (0.4f, 0, 0, 1, 0), T (0x1p-149f, 0, 0, 1, 0), TZ (0.0f, 0), TZ (-0.0f, 0), T (-0x1p-149f, 0, 0, 0, -1), T (-0.4f, 0, 0, 0, -1), T (-0.5f, 0, 0, 0, -1), T (-0.6f, -1, 0, 0, -1), TZ (-1.0f, -1), T (-1.4f, -1, -1, -1, -2), T (-1.5f, -2, -1, -1, -2), T (-1.6f, -2, -1, -1, -2), TZ (-0x1.fffffep30f, 0x80000080), TZ (-0x1p31f, 0x80000000), TZ (-0x1.fffffep127f, 0x80000000), TZ (-INFF, 0x80000000), TZ (-NANF, 0), }; static const struct float_test_data efsctui_test_data[] = { TZ (NANF, 0), TZ (INFF, 0xffffffff), TZ (0x1.fffffep127f, 0xffffffff), TZ (0x1p32f, 0xffffffff), TZ (0x1.fffffep31f, 0xffffff00), T (1.6f, 2, 1, 2, 1), T (1.5f, 2, 1, 2, 1), T (1.4f, 1, 1, 2, 1), TZ (1.0f, 1), T (0.6f, 1, 0, 1, 0), T (0.5f, 0, 0, 1, 0), T (0.4f, 0, 0, 1, 0), T (0x1p-149f, 0, 0, 1, 0), TZ (0.0f, 0), TZ (-0.0f, 0), TZ (-0x1p-149f, 0), TZ (-0.4f, 0), TZ (-0.5f, 0), TZ (-0.6f, 0), TZ (-1.0f, 0), TZ (-1.4f, 0), TZ (-1.5f, 0), TZ (-1.6f, 0), TZ (-0x1.fffffep127f, 0), TZ (-INFF, 0), TZ (-NANF, 0), }; static const struct double_test_data efdctsi_test_data[] = { TZ (NAND, 0), TZ (INFD, 0x7fffffff), TZ (0x1.fffffffffffffp1023, 0x7fffffff), TZ (0x1.0000000000001p31, 0x7fffffff), TZ (0x1p31, 0x7fffffff), TZ (0x1.fffffffffffffp30, 0x7fffffff), TZ (0x1.fffffffcp30, 0x7fffffff), T (1.6, 2, 1, 2, 1), T (1.5, 2, 1, 2, 1), T (1.4, 1, 1, 2, 1), TZ (1.0, 1), T (0.6, 1, 0, 1, 0), T (0.5, 0, 0, 1, 0), T (0.4, 0, 0, 1, 0), T (0x1p-1074, 0, 0, 1, 0), TZ (0.0, 0), TZ (-0.0, 0), T (-0x1p-1074, 0, 0, 0, -1), T (-0.4, 0, 0, 0, -1), T (-0.5, 0, 0, 0, -1), T (-0.6, -1, 0, 0, -1), TZ (-1.0, -1), T (-1.4, -1, -1, -1, -2), T (-1.5, -2, -1, -1, -2), T (-1.6, -2, -1, -1, -2), TZ (-0x1.fffffffcp30, 0x80000001), T (-0x1.fffffffffffffp30, 0x80000000, 0x80000001, 0x80000001, 0x80000000), TZ (-0x1p31, 0x80000000), TZ (-0x1.0000000000001p31, 0x80000000), TZ (-0x1.fffffffffffffp1023, 0x80000000), TZ (-INFD, 0x80000000), TZ (-NAND, 0), }; static const struct double_test_data efdctui_test_data[] = { TZ (NAND, 0), TZ (INFD, 0xffffffff), TZ (0x1.fffffffffffffp1023, 0xffffffff), TZ (0x1.0000000000001p32, 0xffffffff), TZ (0x1p32, 0xffffffff), TZ (0x1.fffffffffffffp31, 0xffffffff), T (1.6, 2, 1, 2, 1), T (1.5, 2, 1, 2, 1), T (1.4, 1, 1, 2, 1), TZ (1.0, 1), T (0.6, 1, 0, 1, 0), T (0.5, 0, 0, 1, 0), T (0.4, 0, 0, 1, 0), T (0x1p-1074, 0, 0, 1, 0), TZ (0.0, 0), TZ (-0.0, 0), TZ (-0x1p-1074, 0), TZ (-0.4, 0), TZ (-0.5, 0), TZ (-0.6, 0), TZ (-1.0, 0), TZ (-1.4, 0), TZ (-1.5, 0), TZ (-1.6, 0), TZ (-0x1.fffffffffffffp1023, 0), TZ (-INFD, 0), TZ (-NAND, 0), }; static const struct float_test_data efsctsf_test_data[] = { TZ (NANF, 0), TZ (INFF, 0x7fffffff), TZ (0x1.fffffep127f, 0x7fffffff), TZ (0x1.000002p0f, 0x7fffffff), TZ (1.0f, 0x7fffffff), TZ (0x1.fffffep-1f, 0x7fffff80), TZ (0xffffff.0p-31f, 0xffffff), T (0x7fffff.8p-31f, 0x800000, 0x7fffff, 0x800000, 0x7fffff), T (0x7ffffe.8p-31f, 0x7ffffe, 0x7ffffe, 0x7fffff, 0x7ffffe), T (0x1.9p-31f, 2, 1, 2, 1), T (0x1.8p-31f, 2, 1, 2, 1), T (0x1.7p-31f, 1, 1, 2, 1), TZ (0x1p-31f, 1), T (0x0.9p-31f, 1, 0, 1, 0), T (0x0.8p-31f, 0, 0, 1, 0), T (0x0.7p-31f, 0, 0, 1, 0), T (0x1p-149f, 0, 0, 1, 0), TZ (0.0f, 0), TZ (-0.0f, 0), T (-0x1p-149f, 0, 0, 0, -1), T (-0x0.7p-31f, 0, 0, 0, -1), T (-0x0.8p-31f, 0, 0, 0, -1), T (-0x0.9p-31f, -1, 0, 0, -1), TZ (-0x1p-31f, -1), T (-0x1.7p-31f, -1, -1, -1, -2), T (-0x1.8p-31f, -2, -1, -1, -2), T (-0x1.9p-31f, -2, -1, -1, -2), T (-0x7ffffe.8p-31f, -0x7ffffe, -0x7ffffe, -0x7ffffe, -0x7fffff), T (-0x7fffff.8p-31f, -0x800000, -0x7fffff, -0x7fffff, -0x800000), TZ (-0xffffff.0p-31f, -0xffffff), TZ (-0x1.fffffep-1f, -0x7fffff80), TZ (-1.0f, 0x80000000), TZ (-0x1.000002p0f, 0x80000000), TZ (-0x1.fffffep127f, 0x80000000), TZ (-INFF, 0x80000000), TZ (-NANF, 0), }; static const struct float_test_data efsctuf_test_data[] = { TZ (NANF, 0), TZ (INFF, 0xffffffff), TZ (0x1.fffffep127f, 0xffffffff), TZ (0x1.000002p0f, 0xffffffff), TZ (1.0f, 0xffffffff), TZ (0x1.fffffep-1f, 0xffffff00), TZ (0xffffff.0p-32f, 0xffffff), T (0x7fffff.8p-32f, 0x800000, 0x7fffff, 0x800000, 0x7fffff), T (0x7ffffe.8p-32f, 0x7ffffe, 0x7ffffe, 0x7fffff, 0x7ffffe), T (0x1.9p-32f, 2, 1, 2, 1), T (0x1.8p-32f, 2, 1, 2, 1), T (0x1.7p-32f, 1, 1, 2, 1), TZ (0x1p-32f, 1), T (0x0.9p-32f, 1, 0, 1, 0), T (0x0.8p-32f, 0, 0, 1, 0), T (0x0.7p-32f, 0, 0, 1, 0), T (0x1p-149f, 0, 0, 1, 0), TZ (0.0f, 0), TZ (-0.0f, 0), TZ (-0x1p-149f, 0), TZ (-0x0.7p-32f, 0), TZ (-0x0.8p-32f, 0), TZ (-0x0.9p-32f, 0), TZ (-0x1p-32f, 0), TZ (-0x1.7p-32f, 0), TZ (-0x1.8p-32f, 0), TZ (-0x1.9p-32f, 0), TZ (-0x7ffffe.8p-32f, 0), TZ (-0x7fffff.8p-32f, 0), TZ (-0xffffff.0p-32f, 0), TZ (-0x1.fffffep-1f, 0), TZ (-1.0f, 0), TZ (-0x1.000002p0f, 0), TZ (-0x1.fffffep127f, 0), TZ (-INFF, 0), TZ (-NANF, 0), }; static const struct double_test_data efdctsf_test_data[] = { TZ (NAND, 0), TZ (INFD, 0x7fffffff), TZ (0x1.fffffffffffffp1023, 0x7fffffff), TZ (0x1.0000000000001p0, 0x7fffffff), TZ (1.0, 0x7fffffff), TZ (0x7fffffffp-31, 0x7fffffff), T (0x7fffff.8p-31, 0x800000, 0x7fffff, 0x800000, 0x7fffff), T (0x7ffffe.8p-31, 0x7ffffe, 0x7ffffe, 0x7fffff, 0x7ffffe), T (0x1.9p-31, 2, 1, 2, 1), T (0x1.8p-31, 2, 1, 2, 1), T (0x1.7p-31, 1, 1, 2, 1), TZ (0x1p-31, 1), T (0x0.9p-31, 1, 0, 1, 0), T (0x0.8p-31, 0, 0, 1, 0), T (0x0.7p-31, 0, 0, 1, 0), T (0x1p-1074, 0, 0, 1, 0), TZ (0.0, 0), TZ (-0.0, 0), T (-0x1p-1074, 0, 0, 0, -1), T (-0x0.7p-31, 0, 0, 0, -1), T (-0x0.8p-31, 0, 0, 0, -1), T (-0x0.9p-31, -1, 0, 0, -1), TZ (-0x1p-31, -1), T (-0x1.7p-31, -1, -1, -1, -2), T (-0x1.8p-31, -2, -1, -1, -2), T (-0x1.9p-31, -2, -1, -1, -2), T (-0x7ffffe.8p-31, -0x7ffffe, -0x7ffffe, -0x7ffffe, -0x7fffff), T (-0x7fffff.8p-31, -0x800000, -0x7fffff, -0x7fffff, -0x800000), TZ (-0x7fffffffp-31, -0x7fffffff), TZ (-1.0, 0x80000000), TZ (-0x1.0000000000001p0, 0x80000000), TZ (-0x1.fffffffffffffp1023, 0x80000000), TZ (-INFD, 0x80000000), TZ (-NAND, 0), }; static const struct double_test_data efdctuf_test_data[] = { TZ (NAND, 0), TZ (INFD, 0xffffffff), TZ (0x1.fffffffffffffp1023, 0xffffffff), TZ (0x1.0000000000001p0, 0xffffffff), TZ (1.0, 0xffffffff), TZ (0xffffffffp-32, 0xffffffff), T (0xfffffffe.9p-32, 0xffffffff, 0xfffffffe, 0xffffffff, 0xfffffffe), T (0xfffffffe.8p-32, 0xfffffffe, 0xfffffffe, 0xffffffff, 0xfffffffe), T (0xfffffffe.7p-32, 0xfffffffe, 0xfffffffe, 0xffffffff, 0xfffffffe), T (0xfffffffd.9p-32, 0xfffffffe, 0xfffffffd, 0xfffffffe, 0xfffffffd), T (0xfffffffd.8p-32, 0xfffffffe, 0xfffffffd, 0xfffffffe, 0xfffffffd), T (0xfffffffd.7p-32, 0xfffffffd, 0xfffffffd, 0xfffffffe, 0xfffffffd), T (0x7fffff.8p-32, 0x800000, 0x7fffff, 0x800000, 0x7fffff), T (0x7ffffe.8p-32, 0x7ffffe, 0x7ffffe, 0x7fffff, 0x7ffffe), T (0x1.9p-32, 2, 1, 2, 1), T (0x1.8p-32, 2, 1, 2, 1), T (0x1.7p-32, 1, 1, 2, 1), TZ (0x1p-32, 1), T (0x0.9p-32, 1, 0, 1, 0), T (0x0.8p-32, 0, 0, 1, 0), T (0x0.7p-32, 0, 0, 1, 0), T (0x1p-1074, 0, 0, 1, 0), TZ (0.0, 0), TZ (-0.0, 0), TZ (-0x1p-1074, 0), TZ (-0x0.7p-32, 0), TZ (-0x0.8p-32, 0), TZ (-0x0.9p-32, 0), TZ (-0x1p-32, 0), TZ (-0x1.7p-32, 0), TZ (-0x1.8p-32, 0), TZ (-0x1.9p-32, 0), TZ (-0x7ffffe.8p-32, 0), TZ (-0x7fffff.8p-32, 0), TZ (-0xfffffffd.7p-32, 0), TZ (-0xfffffffd.8p-32, 0), TZ (-0xfffffffd.9p-32, 0), TZ (-0xfffffffe.7p-32, 0), TZ (-0xfffffffe.8p-32, 0), TZ (-0xfffffffe.9p-32, 0), TZ (-0xffffffffp-32, 0), TZ (-1.0, 0), TZ (-0x1.0000000000001p0, 0), TZ (-0x1.fffffffffffffp1023, 0), TZ (-INFD, 0), TZ (-NAND, 0), }; RUN_FLOAT_TESTS (efsctsiz) RUN_VFLOAT_TESTS (evfsctsiz, efsctsiz) RUN_FLOAT_TESTS (efsctuiz) RUN_VFLOAT_TESTS (evfsctuiz, efsctuiz) RUN_FLOAT_TESTS (efdctsiz) RUN_FLOAT_TESTS (efdctuiz) RUN_FLOAT_TESTS (efsctsi) RUN_VFLOAT_TESTS (evfsctsi, efsctsi) RUN_FLOAT_TESTS (efsctui) RUN_VFLOAT_TESTS (evfsctui, efsctui) RUN_FLOAT_TESTS (efdctsi) RUN_FLOAT_TESTS (efdctui) RUN_FLOAT_TESTS (efsctsf) RUN_VFLOAT_TESTS (evfsctsf, efsctsf) RUN_FLOAT_TESTS (efsctuf) RUN_VFLOAT_TESTS (evfsctuf, efsctuf) RUN_FLOAT_TESTS (efdctsf) RUN_FLOAT_TESTS (efdctuf) int main (void) { test_efsctsiz (); test_evfsctsiz (); test_efsctuiz (); test_evfsctuiz (); test_efdctsiz (); test_efdctuiz (); test_efsctsi (); test_evfsctsi (); test_efsctui (); test_evfsctui (); test_efdctsi (); test_efdctui (); test_efsctsf (); test_evfsctsf (); test_efsctuf (); test_evfsctuf (); test_efdctsf (); test_efdctuf (); printf ("%d tests passed, %d tests failed\n", success_count, failure_count); exit (failure_count != 0 ? EXIT_FAILURE : EXIT_SUCCESS); }
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index ecdf35d..01a0abb 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -275,21 +275,13 @@ int do_spe_mathemu(struct pt_regs *regs) case EFSCTSF: case EFSCTUF: - if (!((vb.wp[1] >> 23) == 0xff && ((vb.wp[1] & 0x7fffff) > 0))) { - /* NaN */ - if (((vb.wp[1] >> 23) & 0xff) == 0) { - /* denorm */ - vc.wp[1] = 0x0; - } else if ((vb.wp[1] >> 31) == 0) { - /* positive normal */ - vc.wp[1] = (func == EFSCTSF) ? - 0x7fffffff : 0xffffffff; - } else { /* negative normal */ - vc.wp[1] = (func == EFSCTSF) ? - 0x80000000 : 0x0; - } - } else { /* rB is NaN */ - vc.wp[1] = 0x0; + if (SB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + SB_e += (func == EFSCTSF ? 31 : 32); + FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, + (func == EFSCTSF)); } goto update_regs; @@ -306,16 +298,25 @@ int do_spe_mathemu(struct pt_regs *regs) } case EFSCTSI: - case EFSCTSIZ: case EFSCTUI: + if (SB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, + ((func & 0x3) != 0)); + } + goto update_regs; + + case EFSCTSIZ: case EFSCTUIZ: - if (func & 0x4) { - _FP_ROUND(1, SB); + if (SB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); } else { - _FP_ROUND_ZERO(1, SB); + FP_TO_INT_S(vc.wp[1], SB, 32, + ((func & 0x3) != 0)); } - FP_TO_INT_S(vc.wp[1], SB, 32, - (((func & 0x3) != 0) || SB_s)); goto update_regs; default: @@ -404,22 +405,13 @@ cmp_s: case EFDCTSF: case EFDCTUF: - if (!((vb.wp[0] >> 20) == 0x7ff && - ((vb.wp[0] & 0xfffff) > 0 || (vb.wp[1] > 0)))) { - /* not a NaN */ - if (((vb.wp[0] >> 20) & 0x7ff) == 0) { - /* denorm */ - vc.wp[1] = 0x0; - } else if ((vb.wp[0] >> 31) == 0) { - /* positive normal */ - vc.wp[1] = (func == EFDCTSF) ? - 0x7fffffff : 0xffffffff; - } else { /* negative normal */ - vc.wp[1] = (func == EFDCTSF) ? - 0x80000000 : 0x0; - } - } else { /* NaN */ - vc.wp[1] = 0x0; + if (DB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + DB_e += (func == EFDCTSF ? 31 : 32); + FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, + (func == EFDCTSF)); } goto update_regs; @@ -437,21 +429,35 @@ cmp_s: case EFDCTUIDZ: case EFDCTSIDZ: - _FP_ROUND_ZERO(2, DB); - FP_TO_INT_D(vc.dp[0], DB, 64, ((func & 0x1) == 0)); + if (DB_c == FP_CLS_NAN) { + vc.dp[0] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_D(vc.dp[0], DB, 64, + ((func & 0x1) == 0)); + } goto update_regs; case EFDCTUI: case EFDCTSI: + if (DB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, + ((func & 0x3) != 0)); + } + goto update_regs; + case EFDCTUIZ: case EFDCTSIZ: - if (func & 0x4) { - _FP_ROUND(2, DB); + if (DB_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); } else { - _FP_ROUND_ZERO(2, DB); + FP_TO_INT_D(vc.wp[1], DB, 32, + ((func & 0x3) != 0)); } - FP_TO_INT_D(vc.wp[1], DB, 32, - (((func & 0x3) != 0) || DB_s)); goto update_regs; default: @@ -556,37 +562,60 @@ cmp_d: cmp = -1; goto cmp_vs; - case EVFSCTSF: - __asm__ __volatile__ ("mtspr 512, %4\n" - "efsctsf %0, %2\n" - "efsctsf %1, %3\n" - : "=r" (vc.wp[0]), "=r" (vc.wp[1]) - : "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0)); - goto update_regs; - case EVFSCTUF: - __asm__ __volatile__ ("mtspr 512, %4\n" - "efsctuf %0, %2\n" - "efsctuf %1, %3\n" - : "=r" (vc.wp[0]), "=r" (vc.wp[1]) - : "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0)); + case EVFSCTSF: + if (SB0_c == FP_CLS_NAN) { + vc.wp[0] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + SB0_e += (func == EVFSCTSF ? 31 : 32); + FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, + (func == EVFSCTSF)); + } + if (SB1_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + SB1_e += (func == EVFSCTSF ? 31 : 32); + FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, + (func == EVFSCTSF)); + } goto update_regs; case EVFSCTUI: case EVFSCTSI: + if (SB0_c == FP_CLS_NAN) { + vc.wp[0] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, + ((func & 0x3) != 0)); + } + if (SB1_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, + ((func & 0x3) != 0)); + } + goto update_regs; + case EVFSCTUIZ: case EVFSCTSIZ: - if (func & 0x4) { - _FP_ROUND(1, SB0); - _FP_ROUND(1, SB1); + if (SB0_c == FP_CLS_NAN) { + vc.wp[0] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); } else { - _FP_ROUND_ZERO(1, SB0); - _FP_ROUND_ZERO(1, SB1); + FP_TO_INT_S(vc.wp[0], SB0, 32, + ((func & 0x3) != 0)); + } + if (SB1_c == FP_CLS_NAN) { + vc.wp[1] = 0; + FP_SET_EXCEPTION(FP_EX_INVALID); + } else { + FP_TO_INT_S(vc.wp[1], SB1, 32, + ((func & 0x3) != 0)); } - FP_TO_INT_S(vc.wp[0], SB0, 32, - (((func & 0x3) != 0) || SB0_s)); - FP_TO_INT_S(vc.wp[1], SB1, 32, - (((func & 0x3) != 0) || SB1_s)); goto update_regs; default: @@ -681,14 +710,16 @@ int speround_handler(struct pt_regs *regs) union dw_union fgpr; int s_lo, s_hi; int lo_inexact, hi_inexact; - unsigned long speinsn, type, fc, fptype; + int fp_result; + unsigned long speinsn, type, fb, fc, fptype, func; if (get_user(speinsn, (unsigned int __user *) regs->nip)) return -EFAULT; if ((speinsn >> 26) != 4) return -EINVAL; /* not an spe instruction */ - type = insn_type(speinsn & 0x7ff); + func = speinsn & 0x7ff; + type = insn_type(func); if (type == XCR) return -ENOSYS; __FPU_FPSCR = mfspr(SPRN_SPEFSCR); @@ -708,6 +739,65 @@ int speround_handler(struct pt_regs *regs) fgpr.wp[0] = current->thread.evr[fc]; fgpr.wp[1] = regs->gpr[fc]; + fb = (speinsn >> 11) & 0x1f; + switch (func) { + case EFSCTUIZ: + case EFSCTSIZ: + case EVFSCTUIZ: + case EVFSCTSIZ: + case EFDCTUIDZ: + case EFDCTSIDZ: + case EFDCTUIZ: + case EFDCTSIZ: + /* + * These instructions always round to zero, + * independent of the rounding mode. + */ + return 0; + + case EFSCTUI: + case EFSCTUF: + case EVFSCTUI: + case EVFSCTUF: + case EFDCTUI: + case EFDCTUF: + fp_result = 0; + s_lo = 0; + s_hi = 0; + break; + + case EFSCTSI: + case EFSCTSF: + fp_result = 0; + /* Recover the sign of a zero result if possible. */ + if (fgpr.wp[1] == 0) + s_lo = regs->gpr[fb] & SIGN_BIT_S; + break; + + case EVFSCTSI: + case EVFSCTSF: + fp_result = 0; + /* Recover the sign of a zero result if possible. */ + if (fgpr.wp[1] == 0) + s_lo = regs->gpr[fb] & SIGN_BIT_S; + if (fgpr.wp[0] == 0) + s_hi = current->thread.evr[fb] & SIGN_BIT_S; + break; + + case EFDCTSI: + case EFDCTSF: + fp_result = 0; + s_hi = s_lo; + /* Recover the sign of a zero result if possible. */ + if (fgpr.wp[1] == 0) + s_hi = current->thread.evr[fb] & SIGN_BIT_S; + break; + + default: + fp_result = 1; + break; + } + pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); switch (fptype) { @@ -719,15 +809,30 @@ int speround_handler(struct pt_regs *regs) if ((FP_ROUNDMODE) == FP_RND_PINF) { if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */ } else { /* round to -Inf */ - if (s_lo) fgpr.wp[1]++; /* Z < 0, choose Z2 */ + if (s_lo) { + if (fp_result) + fgpr.wp[1]++; /* Z < 0, choose Z2 */ + else + fgpr.wp[1]--; /* Z < 0, choose Z2 */ + } } break; case DPFP: if (FP_ROUNDMODE == FP_RND_PINF) { - if (!s_hi) fgpr.dp[0]++; /* Z > 0, choose Z1 */ + if (!s_hi) { + if (fp_result) + fgpr.dp[0]++; /* Z > 0, choose Z1 */ + else + fgpr.wp[1]++; /* Z > 0, choose Z1 */ + } } else { /* round to -Inf */ - if (s_hi) fgpr.dp[0]++; /* Z < 0, choose Z2 */ + if (s_hi) { + if (fp_result) + fgpr.dp[0]++; /* Z < 0, choose Z2 */ + else + fgpr.wp[1]--; /* Z < 0, choose Z2 */ + } } break; @@ -738,10 +843,18 @@ int speround_handler(struct pt_regs *regs) if (hi_inexact && !s_hi) fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ } else { /* round to -Inf */ - if (lo_inexact && s_lo) - fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ - if (hi_inexact && s_hi) - fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ + if (lo_inexact && s_lo) { + if (fp_result) + fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ + else + fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ + } + if (hi_inexact && s_hi) { + if (fp_result) + fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ + else + fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ + } } break;