| /* |
| * IEEE754 floating point arithmetic |
| * single precision: MADDF.f (Fused Multiply Add) |
| * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) |
| * |
| * MIPS floating point support |
| * Copyright (C) 2015 Imagination Technologies, Ltd. |
| * Author: Markos Chandras <markos.chandras@imgtec.com> |
| * |
| * This program is free software; you can distribute it and/or modify it |
| * under the terms of the GNU General Public License as published by the |
| * Free Software Foundation; version 2 of the License. |
| */ |
| |
| #include "ieee754sp.h" |
| |
| enum maddf_flags { |
| maddf_negate_product = 1 << 0, |
| }; |
| |
| static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, |
| union ieee754sp y, enum maddf_flags flags) |
| { |
| int re; |
| int rs; |
| unsigned rm; |
| unsigned short lxm; |
| unsigned short hxm; |
| unsigned short lym; |
| unsigned short hym; |
| unsigned lrm; |
| unsigned hrm; |
| unsigned t; |
| unsigned at; |
| int s; |
| |
| COMPXSP; |
| COMPYSP; |
| COMPZSP; |
| |
| EXPLODEXSP; |
| EXPLODEYSP; |
| EXPLODEZSP; |
| |
| FLUSHXSP; |
| FLUSHYSP; |
| FLUSHZSP; |
| |
| ieee754_clearcx(); |
| |
| switch (zc) { |
| case IEEE754_CLASS_SNAN: |
| ieee754_setcx(IEEE754_INVALID_OPERATION); |
| return ieee754sp_nanxcpt(z); |
| case IEEE754_CLASS_DNORM: |
| SPDNORMZ; |
| /* QNAN is handled separately below */ |
| } |
| |
| switch (CLPAIR(xc, yc)) { |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): |
| return ieee754sp_nanxcpt(y); |
| |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): |
| return ieee754sp_nanxcpt(x); |
| |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): |
| return y; |
| |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): |
| return x; |
| |
| /* |
| * Infinity handling |
| */ |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| ieee754_setcx(IEEE754_INVALID_OPERATION); |
| return ieee754sp_indef(); |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| return ieee754sp_inf(xs ^ ys); |
| |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): |
| if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| /* Multiplication is 0 so just return z */ |
| return z; |
| |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): |
| SPDNORMX; |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| SPDNORMY; |
| break; |
| |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| SPDNORMX; |
| break; |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| /* fall through to real computations */ |
| } |
| |
| /* Finally get to do some computation */ |
| |
| /* |
| * Do the multiplication bit first |
| * |
| * rm = xm * ym, re = xe + ye basically |
| * |
| * At this point xm and ym should have been normalized. |
| */ |
| |
| /* rm = xm * ym, re = xe+ye basically */ |
| assert(xm & SP_HIDDEN_BIT); |
| assert(ym & SP_HIDDEN_BIT); |
| |
| re = xe + ye; |
| rs = xs ^ ys; |
| if (flags & maddf_negate_product) |
| rs ^= 1; |
| |
| /* shunt to top of word */ |
| xm <<= 32 - (SP_FBITS + 1); |
| ym <<= 32 - (SP_FBITS + 1); |
| |
| /* |
| * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. |
| */ |
| lxm = xm & 0xffff; |
| hxm = xm >> 16; |
| lym = ym & 0xffff; |
| hym = ym >> 16; |
| |
| lrm = lxm * lym; /* 16 * 16 => 32 */ |
| hrm = hxm * hym; /* 16 * 16 => 32 */ |
| |
| t = lxm * hym; /* 16 * 16 => 32 */ |
| at = lrm + (t << 16); |
| hrm += at < lrm; |
| lrm = at; |
| hrm = hrm + (t >> 16); |
| |
| t = hxm * lym; /* 16 * 16 => 32 */ |
| at = lrm + (t << 16); |
| hrm += at < lrm; |
| lrm = at; |
| hrm = hrm + (t >> 16); |
| |
| rm = hrm | (lrm != 0); |
| |
| /* |
| * Sticky shift down to normal rounding precision. |
| */ |
| if ((int) rm < 0) { |
| rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | |
| ((rm << (SP_FBITS + 1 + 3)) != 0); |
| re++; |
| } else { |
| rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | |
| ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); |
| } |
| assert(rm & (SP_HIDDEN_BIT << 3)); |
| |
| /* And now the addition */ |
| |
| assert(zm & SP_HIDDEN_BIT); |
| |
| /* |
| * Provide guard,round and stick bit space. |
| */ |
| zm <<= 3; |
| |
| if (ze > re) { |
| /* |
| * Have to shift r fraction right to align. |
| */ |
| s = ze - re; |
| rm = XSPSRS(rm, s); |
| re += s; |
| } else if (re > ze) { |
| /* |
| * Have to shift z fraction right to align. |
| */ |
| s = re - ze; |
| zm = XSPSRS(zm, s); |
| ze += s; |
| } |
| assert(ze == re); |
| assert(ze <= SP_EMAX); |
| |
| if (zs == rs) { |
| /* |
| * Generate 28 bit result of adding two 27 bit numbers |
| * leaving result in zm, zs and ze. |
| */ |
| zm = zm + rm; |
| |
| if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ |
| zm = XSPSRS1(zm); |
| ze++; |
| } |
| } else { |
| if (zm >= rm) { |
| zm = zm - rm; |
| } else { |
| zm = rm - zm; |
| zs = rs; |
| } |
| if (zm == 0) |
| return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); |
| |
| /* |
| * Normalize in extended single precision |
| */ |
| while ((zm >> (SP_MBITS + 3)) == 0) { |
| zm <<= 1; |
| ze--; |
| } |
| |
| } |
| return ieee754sp_format(zs, ze, zm); |
| } |
| |
| union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, |
| union ieee754sp y) |
| { |
| return _sp_maddf(z, x, y, 0); |
| } |
| |
| union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, |
| union ieee754sp y) |
| { |
| return _sp_maddf(z, x, y, maddf_negate_product); |
| } |