| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP |
| M68000 Hi-Performance Microprocessor Division |
| M68060 Software Package |
| Production Release P1.00 -- October 10, 1994 |
| |
| M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. |
| |
| THE SOFTWARE is provided on an "AS IS" basis and without warranty. |
| To the maximum extent permitted by applicable law, |
| MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, |
| INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE |
| and any warranty against infringement with regard to the SOFTWARE |
| (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. |
| |
| To the maximum extent permitted by applicable law, |
| IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER |
| (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, |
| BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) |
| ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. |
| Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. |
| |
| You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE |
| so long as this entire notice is retained without alteration in any modified and/or |
| redistributed versions, and that such modified versions are clearly identified as such. |
| No licenses are granted by implication, estoppel or otherwise under any patents |
| or trademarks of Motorola, Inc. |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| # litop.s: |
| # This file is appended to the top of the 060FPLSP package |
| # and contains the entry points into the package. The user, in |
| # effect, branches to one of the branch table entries located here. |
| # |
| |
| bra.l _060LSP__idivs64_ |
| short 0x0000 |
| bra.l _060LSP__idivu64_ |
| short 0x0000 |
| |
| bra.l _060LSP__imuls64_ |
| short 0x0000 |
| bra.l _060LSP__imulu64_ |
| short 0x0000 |
| |
| bra.l _060LSP__cmp2_Ab_ |
| short 0x0000 |
| bra.l _060LSP__cmp2_Aw_ |
| short 0x0000 |
| bra.l _060LSP__cmp2_Al_ |
| short 0x0000 |
| bra.l _060LSP__cmp2_Db_ |
| short 0x0000 |
| bra.l _060LSP__cmp2_Dw_ |
| short 0x0000 |
| bra.l _060LSP__cmp2_Dl_ |
| short 0x0000 |
| |
| # leave room for future possible aditions. |
| align 0x200 |
| |
| ######################################################################### |
| # XDEF **************************************************************** # |
| # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # |
| # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # |
| # # |
| # This is the library version which is accessed as a subroutine # |
| # and therefore does not work exactly like the 680X0 div{s,u}.l # |
| # 64-bit divide instruction. # |
| # # |
| # XREF **************************************************************** # |
| # None. # |
| # # |
| # INPUT *************************************************************** # |
| # 0x4(sp) = divisor # |
| # 0x8(sp) = hi(dividend) # |
| # 0xc(sp) = lo(dividend) # |
| # 0x10(sp) = pointer to location to place quotient/remainder # |
| # # |
| # OUTPUT ************************************************************** # |
| # 0x10(sp) = points to location of remainder/quotient. # |
| # remainder is in first longword, quotient is in 2nd. # |
| # # |
| # ALGORITHM *********************************************************** # |
| # If the operands are signed, make them unsigned and save the # |
| # sign info for later. Separate out special cases like divide-by-zero # |
| # or 32-bit divides if possible. Else, use a special math algorithm # |
| # to calculate the result. # |
| # Restore sign info if signed instruction. Set the condition # |
| # codes before performing the final "rts". If the divisor was equal to # |
| # zero, then perform a divide-by-zero using a 16-bit implemented # |
| # divide instruction. This way, the operating system can record that # |
| # the event occurred even though it may not point to the correct place. # |
| # # |
| ######################################################################### |
| |
| set POSNEG, -1 |
| set NDIVISOR, -2 |
| set NDIVIDEND, -3 |
| set DDSECOND, -4 |
| set DDNORMAL, -8 |
| set DDQUOTIENT, -12 |
| set DIV64_CC, -16 |
| |
| ########## |
| # divs.l # |
| ########## |
| global _060LSP__idivs64_ |
| _060LSP__idivs64_: |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-16 |
| movm.l &0x3f00,-(%sp) # save d2-d7 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,DIV64_CC(%a6) |
| st POSNEG(%a6) # signed operation |
| bra.b ldiv64_cont |
| |
| ########## |
| # divu.l # |
| ########## |
| global _060LSP__idivu64_ |
| _060LSP__idivu64_: |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-16 |
| movm.l &0x3f00,-(%sp) # save d2-d7 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,DIV64_CC(%a6) |
| sf POSNEG(%a6) # unsigned operation |
| |
| ldiv64_cont: |
| mov.l 0x8(%a6),%d7 # fetch divisor |
| |
| beq.w ldiv64eq0 # divisor is = 0!!! |
| |
| mov.l 0xc(%a6), %d5 # get dividend hi |
| mov.l 0x10(%a6), %d6 # get dividend lo |
| |
| # separate signed and unsigned divide |
| tst.b POSNEG(%a6) # signed or unsigned? |
| beq.b ldspecialcases # use positive divide |
| |
| # save the sign of the divisor |
| # make divisor unsigned if it's negative |
| tst.l %d7 # chk sign of divisor |
| slt NDIVISOR(%a6) # save sign of divisor |
| bpl.b ldsgndividend |
| neg.l %d7 # complement negative divisor |
| |
| # save the sign of the dividend |
| # make dividend unsigned if it's negative |
| ldsgndividend: |
| tst.l %d5 # chk sign of hi(dividend) |
| slt NDIVIDEND(%a6) # save sign of dividend |
| bpl.b ldspecialcases |
| |
| mov.w &0x0, %cc # clear 'X' cc bit |
| negx.l %d6 # complement signed dividend |
| negx.l %d5 |
| |
| # extract some special cases: |
| # - is (dividend == 0) ? |
| # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) |
| ldspecialcases: |
| tst.l %d5 # is (hi(dividend) == 0) |
| bne.b ldnormaldivide # no, so try it the long way |
| |
| tst.l %d6 # is (lo(dividend) == 0), too |
| beq.w lddone # yes, so (dividend == 0) |
| |
| cmp.l %d7,%d6 # is (divisor <= lo(dividend)) |
| bls.b ld32bitdivide # yes, so use 32 bit divide |
| |
| exg %d5,%d6 # q = 0, r = dividend |
| bra.w ldivfinish # can't divide, we're done. |
| |
| ld32bitdivide: |
| tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! |
| |
| bra.b ldivfinish |
| |
| ldnormaldivide: |
| # last special case: |
| # - is hi(dividend) >= divisor ? if yes, then overflow |
| cmp.l %d7,%d5 |
| bls.b lddovf # answer won't fit in 32 bits |
| |
| # perform the divide algorithm: |
| bsr.l ldclassical # do int divide |
| |
| # separate into signed and unsigned finishes. |
| ldivfinish: |
| tst.b POSNEG(%a6) # do divs, divu separately |
| beq.b lddone # divu has no processing!!! |
| |
| # it was a divs.l, so ccode setting is a little more complicated... |
| tst.b NDIVIDEND(%a6) # remainder has same sign |
| beq.b ldcc # as dividend. |
| neg.l %d5 # sgn(rem) = sgn(dividend) |
| ldcc: |
| mov.b NDIVISOR(%a6), %d0 |
| eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative |
| beq.b ldqpos # branch to quot positive |
| |
| # 0x80000000 is the largest number representable as a 32-bit negative |
| # number. the negative of 0x80000000 is 0x80000000. |
| cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? |
| bhi.b lddovf |
| |
| neg.l %d6 # make (-quot) 2's comp |
| |
| bra.b lddone |
| |
| ldqpos: |
| btst &0x1f, %d6 # will (+quot) fit in 32 bits? |
| bne.b lddovf |
| |
| lddone: |
| # if the register numbers are the same, only the quotient gets saved. |
| # so, if we always save the quotient second, we save ourselves a cmp&beq |
| andi.w &0x10,DIV64_CC(%a6) |
| mov.w DIV64_CC(%a6),%cc |
| tst.l %d6 # may set 'N' ccode bit |
| |
| # here, the result is in d1 and d0. the current strategy is to save |
| # the values at the location pointed to by a0. |
| # use movm here to not disturb the condition codes. |
| ldexit: |
| movm.l &0x0060,([0x14,%a6]) # save result |
| |
| # EPILOGUE BEGIN ######################################################## |
| # fmovm.l (%sp)+,&0x0 # restore no fpregs |
| movm.l (%sp)+,&0x00fc # restore d2-d7 |
| unlk %a6 |
| # EPILOGUE END ########################################################## |
| |
| rts |
| |
| # the result should be the unchanged dividend |
| lddovf: |
| mov.l 0xc(%a6), %d5 # get dividend hi |
| mov.l 0x10(%a6), %d6 # get dividend lo |
| |
| andi.w &0x1c,DIV64_CC(%a6) |
| ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit |
| mov.w DIV64_CC(%a6),%cc |
| |
| bra.b ldexit |
| |
| ldiv64eq0: |
| mov.l 0xc(%a6),([0x14,%a6]) |
| mov.l 0x10(%a6),([0x14,%a6],0x4) |
| |
| mov.w DIV64_CC(%a6),%cc |
| |
| # EPILOGUE BEGIN ######################################################## |
| # fmovm.l (%sp)+,&0x0 # restore no fpregs |
| movm.l (%sp)+,&0x00fc # restore d2-d7 |
| unlk %a6 |
| # EPILOGUE END ########################################################## |
| |
| divu.w &0x0,%d0 # force a divbyzero exception |
| rts |
| |
| ########################################################################### |
| ######################################################################### |
| # This routine uses the 'classical' Algorithm D from Donald Knuth's # |
| # Art of Computer Programming, vol II, Seminumerical Algorithms. # |
| # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # |
| # where U,V are words of the quadword dividend and longword divisor, # |
| # and U1, V1 are the most significant words. # |
| # # |
| # The most sig. longword of the 64 bit dividend must be in %d5, least # |
| # in %d6. The divisor must be in the variable ddivisor, and the # |
| # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # |
| # The quotient is returned in %d6, remainder in %d5, unless the # |
| # v (overflow) bit is set in the saved %ccr. If overflow, the dividend # |
| # is unchanged. # |
| ######################################################################### |
| ldclassical: |
| # if the divisor msw is 0, use simpler algorithm then the full blown |
| # one at ddknuth: |
| |
| cmpi.l %d7, &0xffff |
| bhi.b lddknuth # go use D. Knuth algorithm |
| |
| # Since the divisor is only a word (and larger than the mslw of the dividend), |
| # a simpler algorithm may be used : |
| # In the general case, four quotient words would be created by |
| # dividing the divisor word into each dividend word. In this case, |
| # the first two quotient words must be zero, or overflow would occur. |
| # Since we already checked this case above, we can treat the most significant |
| # longword of the dividend as (0) remainder (see Knuth) and merely complete |
| # the last two divisions to get a quotient longword and word remainder: |
| |
| clr.l %d1 |
| swap %d5 # same as r*b if previous step rqd |
| swap %d6 # get u3 to lsw position |
| mov.w %d6, %d5 # rb + u3 |
| |
| divu.w %d7, %d5 |
| |
| mov.w %d5, %d1 # first quotient word |
| swap %d6 # get u4 |
| mov.w %d6, %d5 # rb + u4 |
| |
| divu.w %d7, %d5 |
| |
| swap %d1 |
| mov.w %d5, %d1 # 2nd quotient 'digit' |
| clr.w %d5 |
| swap %d5 # now remainder |
| mov.l %d1, %d6 # and quotient |
| |
| rts |
| |
| lddknuth: |
| # In this algorithm, the divisor is treated as a 2 digit (word) number |
| # which is divided into a 3 digit (word) dividend to get one quotient |
| # digit (word). After subtraction, the dividend is shifted and the |
| # process repeated. Before beginning, the divisor and quotient are |
| # 'normalized' so that the process of estimating the quotient digit |
| # will yield verifiably correct results.. |
| |
| clr.l DDNORMAL(%a6) # count of shifts for normalization |
| clr.b DDSECOND(%a6) # clear flag for quotient digits |
| clr.l %d1 # %d1 will hold trial quotient |
| lddnchk: |
| btst &31, %d7 # must we normalize? first word of |
| bne.b lddnormalized # divisor (V1) must be >= 65536/2 |
| addq.l &0x1, DDNORMAL(%a6) # count normalization shifts |
| lsl.l &0x1, %d7 # shift the divisor |
| lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 |
| roxl.l &0x1, %d5 # shift u1,u2 |
| bra.w lddnchk |
| lddnormalized: |
| |
| # Now calculate an estimate of the quotient words (msw first, then lsw). |
| # The comments use subscripts for the first quotient digit determination. |
| mov.l %d7, %d3 # divisor |
| mov.l %d5, %d2 # dividend mslw |
| swap %d2 |
| swap %d3 |
| cmp.w %d2, %d3 # V1 = U1 ? |
| bne.b lddqcalc1 |
| mov.w &0xffff, %d1 # use max trial quotient word |
| bra.b lddadj0 |
| lddqcalc1: |
| mov.l %d5, %d1 |
| |
| divu.w %d3, %d1 # use quotient of mslw/msw |
| |
| andi.l &0x0000ffff, %d1 # zero any remainder |
| lddadj0: |
| |
| # now test the trial quotient and adjust. This step plus the |
| # normalization assures (according to Knuth) that the trial |
| # quotient will be at worst 1 too large. |
| mov.l %d6, -(%sp) |
| clr.w %d6 # word u3 left |
| swap %d6 # in lsw position |
| lddadj1: mov.l %d7, %d3 |
| mov.l %d1, %d2 |
| mulu.w %d7, %d2 # V2q |
| swap %d3 |
| mulu.w %d1, %d3 # V1q |
| mov.l %d5, %d4 # U1U2 |
| sub.l %d3, %d4 # U1U2 - V1q |
| |
| swap %d4 |
| |
| mov.w %d4,%d0 |
| mov.w %d6,%d4 # insert lower word (U3) |
| |
| tst.w %d0 # is upper word set? |
| bne.w lddadjd1 |
| |
| # add.l %d6, %d4 # (U1U2 - V1q) + U3 |
| |
| cmp.l %d2, %d4 |
| bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? |
| subq.l &0x1, %d1 # yes, decrement and recheck |
| bra.b lddadj1 |
| lddadjd1: |
| # now test the word by multiplying it by the divisor (V1V2) and comparing |
| # the 3 digit (word) result with the current dividend words |
| mov.l %d5, -(%sp) # save %d5 (%d6 already saved) |
| mov.l %d1, %d6 |
| swap %d6 # shift answer to ms 3 words |
| mov.l %d7, %d5 |
| bsr.l ldmm2 |
| mov.l %d5, %d2 # now %d2,%d3 are trial*divisor |
| mov.l %d6, %d3 |
| mov.l (%sp)+, %d5 # restore dividend |
| mov.l (%sp)+, %d6 |
| sub.l %d3, %d6 |
| subx.l %d2, %d5 # subtract double precision |
| bcc ldd2nd # no carry, do next quotient digit |
| subq.l &0x1, %d1 # q is one too large |
| # need to add back divisor longword to current ms 3 digits of dividend |
| # - according to Knuth, this is done only 2 out of 65536 times for random |
| # divisor, dividend selection. |
| clr.l %d2 |
| mov.l %d7, %d3 |
| swap %d3 |
| clr.w %d3 # %d3 now ls word of divisor |
| add.l %d3, %d6 # aligned with 3rd word of dividend |
| addx.l %d2, %d5 |
| mov.l %d7, %d3 |
| clr.w %d3 # %d3 now ms word of divisor |
| swap %d3 # aligned with 2nd word of dividend |
| add.l %d3, %d5 |
| ldd2nd: |
| tst.b DDSECOND(%a6) # both q words done? |
| bne.b lddremain |
| # first quotient digit now correct. store digit and shift the |
| # (subtracted) dividend |
| mov.w %d1, DDQUOTIENT(%a6) |
| clr.l %d1 |
| swap %d5 |
| swap %d6 |
| mov.w %d6, %d5 |
| clr.w %d6 |
| st DDSECOND(%a6) # second digit |
| bra.w lddnormalized |
| lddremain: |
| # add 2nd word to quotient, get the remainder. |
| mov.w %d1, DDQUOTIENT+2(%a6) |
| # shift down one word/digit to renormalize remainder. |
| mov.w %d5, %d6 |
| swap %d6 |
| swap %d5 |
| mov.l DDNORMAL(%a6), %d7 # get norm shift count |
| beq.b lddrn |
| subq.l &0x1, %d7 # set for loop count |
| lddnlp: |
| lsr.l &0x1, %d5 # shift into %d6 |
| roxr.l &0x1, %d6 |
| dbf %d7, lddnlp |
| lddrn: |
| mov.l %d6, %d5 # remainder |
| mov.l DDQUOTIENT(%a6), %d6 # quotient |
| |
| rts |
| ldmm2: |
| # factors for the 32X32->64 multiplication are in %d5 and %d6. |
| # returns 64 bit result in %d5 (hi) %d6(lo). |
| # destroys %d2,%d3,%d4. |
| |
| # multiply hi,lo words of each factor to get 4 intermediate products |
| mov.l %d6, %d2 |
| mov.l %d6, %d3 |
| mov.l %d5, %d4 |
| swap %d3 |
| swap %d4 |
| mulu.w %d5, %d6 # %d6 <- lsw*lsw |
| mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source |
| mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest |
| mulu.w %d4, %d3 # %d3 <- msw*msw |
| # now use swap and addx to consolidate to two longwords |
| clr.l %d4 |
| swap %d6 |
| add.w %d5, %d6 # add msw of l*l to lsw of m*l product |
| addx.w %d4, %d3 # add any carry to m*m product |
| add.w %d2, %d6 # add in lsw of other m*l product |
| addx.w %d4, %d3 # add any carry to m*m product |
| swap %d6 # %d6 is low 32 bits of final product |
| clr.w %d5 |
| clr.w %d2 # lsw of two mixed products used, |
| swap %d5 # now use msws of longwords |
| swap %d2 |
| add.l %d2, %d5 |
| add.l %d3, %d5 # %d5 now ms 32 bits of final product |
| rts |
| |
| ######################################################################### |
| # XDEF **************************************************************** # |
| # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # |
| # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # |
| # # |
| # This is the library version which is accessed as a subroutine # |
| # and therefore does not work exactly like the 680X0 mul{s,u}.l # |
| # 64-bit multiply instruction. # |
| # # |
| # XREF **************************************************************** # |
| # None # |
| # # |
| # INPUT *************************************************************** # |
| # 0x4(sp) = multiplier # |
| # 0x8(sp) = multiplicand # |
| # 0xc(sp) = pointer to location to place 64-bit result # |
| # # |
| # OUTPUT ************************************************************** # |
| # 0xc(sp) = points to location of 64-bit result # |
| # # |
| # ALGORITHM *********************************************************** # |
| # Perform the multiply in pieces using 16x16->32 unsigned # |
| # multiplies and "add" instructions. # |
| # Set the condition codes as appropriate before performing an # |
| # "rts". # |
| # # |
| ######################################################################### |
| |
| set MUL64_CC, -4 |
| |
| global _060LSP__imulu64_ |
| _060LSP__imulu64_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,MUL64_CC(%a6) # save incoming ccodes |
| |
| mov.l 0x8(%a6),%d0 # store multiplier in d0 |
| beq.w mulu64_zero # handle zero separately |
| |
| mov.l 0xc(%a6),%d1 # get multiplicand in d1 |
| beq.w mulu64_zero # handle zero separately |
| |
| ######################################################################### |
| # 63 32 0 # |
| # ---------------------------- # |
| # | hi(mplier) * hi(mplicand)| # |
| # ---------------------------- # |
| # ----------------------------- # |
| # | hi(mplier) * lo(mplicand) | # |
| # ----------------------------- # |
| # ----------------------------- # |
| # | lo(mplier) * hi(mplicand) | # |
| # ----------------------------- # |
| # | ----------------------------- # |
| # --|-- | lo(mplier) * lo(mplicand) | # |
| # | ----------------------------- # |
| # ======================================================== # |
| # -------------------------------------------------------- # |
| # | hi(result) | lo(result) | # |
| # -------------------------------------------------------- # |
| ######################################################################### |
| mulu64_alg: |
| # load temp registers with operands |
| mov.l %d0,%d2 # mr in d2 |
| mov.l %d0,%d3 # mr in d3 |
| mov.l %d1,%d4 # md in d4 |
| swap %d3 # hi(mr) in lo d3 |
| swap %d4 # hi(md) in lo d4 |
| |
| # complete necessary multiplies: |
| mulu.w %d1,%d0 # [1] lo(mr) * lo(md) |
| mulu.w %d3,%d1 # [2] hi(mr) * lo(md) |
| mulu.w %d4,%d2 # [3] lo(mr) * hi(md) |
| mulu.w %d4,%d3 # [4] hi(mr) * hi(md) |
| |
| # add lo portions of [2],[3] to hi portion of [1]. |
| # add carries produced from these adds to [4]. |
| # lo([1]) is the final lo 16 bits of the result. |
| clr.l %d4 # load d4 w/ zero value |
| swap %d0 # hi([1]) <==> lo([1]) |
| add.w %d1,%d0 # hi([1]) + lo([2]) |
| addx.l %d4,%d3 # [4] + carry |
| add.w %d2,%d0 # hi([1]) + lo([3]) |
| addx.l %d4,%d3 # [4] + carry |
| swap %d0 # lo([1]) <==> hi([1]) |
| |
| # lo portions of [2],[3] have been added in to final result. |
| # now, clear lo, put hi in lo reg, and add to [4] |
| clr.w %d1 # clear lo([2]) |
| clr.w %d2 # clear hi([3]) |
| swap %d1 # hi([2]) in lo d1 |
| swap %d2 # hi([3]) in lo d2 |
| add.l %d2,%d1 # [4] + hi([2]) |
| add.l %d3,%d1 # [4] + hi([3]) |
| |
| # now, grab the condition codes. only one that can be set is 'N'. |
| # 'N' CAN be set if the operation is unsigned if bit 63 is set. |
| mov.w MUL64_CC(%a6),%d4 |
| andi.b &0x10,%d4 # keep old 'X' bit |
| tst.l %d1 # may set 'N' bit |
| bpl.b mulu64_ddone |
| ori.b &0x8,%d4 # set 'N' bit |
| mulu64_ddone: |
| mov.w %d4,%cc |
| |
| # here, the result is in d1 and d0. the current strategy is to save |
| # the values at the location pointed to by a0. |
| # use movm here to not disturb the condition codes. |
| mulu64_end: |
| exg %d1,%d0 |
| movm.l &0x0003,([0x10,%a6]) # save result |
| |
| # EPILOGUE BEGIN ######################################################## |
| # fmovm.l (%sp)+,&0x0 # restore no fpregs |
| movm.l (%sp)+,&0x001c # restore d2-d4 |
| unlk %a6 |
| # EPILOGUE END ########################################################## |
| |
| rts |
| |
| # one or both of the operands is zero so the result is also zero. |
| # save the zero result to the register file and set the 'Z' ccode bit. |
| mulu64_zero: |
| clr.l %d0 |
| clr.l %d1 |
| |
| mov.w MUL64_CC(%a6),%d4 |
| andi.b &0x10,%d4 |
| ori.b &0x4,%d4 |
| mov.w %d4,%cc # set 'Z' ccode bit |
| |
| bra.b mulu64_end |
| |
| ########## |
| # muls.l # |
| ########## |
| global _060LSP__imuls64_ |
| _060LSP__imuls64_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3c00,-(%sp) # save d2-d5 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,MUL64_CC(%a6) # save incoming ccodes |
| |
| mov.l 0x8(%a6),%d0 # store multiplier in d0 |
| beq.b mulu64_zero # handle zero separately |
| |
| mov.l 0xc(%a6),%d1 # get multiplicand in d1 |
| beq.b mulu64_zero # handle zero separately |
| |
| clr.b %d5 # clear sign tag |
| tst.l %d0 # is multiplier negative? |
| bge.b muls64_chk_md_sgn # no |
| neg.l %d0 # make multiplier positive |
| |
| ori.b &0x1,%d5 # save multiplier sgn |
| |
| # the result sign is the exclusive or of the operand sign bits. |
| muls64_chk_md_sgn: |
| tst.l %d1 # is multiplicand negative? |
| bge.b muls64_alg # no |
| neg.l %d1 # make multiplicand positive |
| |
| eori.b &0x1,%d5 # calculate correct sign |
| |
| ######################################################################### |
| # 63 32 0 # |
| # ---------------------------- # |
| # | hi(mplier) * hi(mplicand)| # |
| # ---------------------------- # |
| # ----------------------------- # |
| # | hi(mplier) * lo(mplicand) | # |
| # ----------------------------- # |
| # ----------------------------- # |
| # | lo(mplier) * hi(mplicand) | # |
| # ----------------------------- # |
| # | ----------------------------- # |
| # --|-- | lo(mplier) * lo(mplicand) | # |
| # | ----------------------------- # |
| # ======================================================== # |
| # -------------------------------------------------------- # |
| # | hi(result) | lo(result) | # |
| # -------------------------------------------------------- # |
| ######################################################################### |
| muls64_alg: |
| # load temp registers with operands |
| mov.l %d0,%d2 # mr in d2 |
| mov.l %d0,%d3 # mr in d3 |
| mov.l %d1,%d4 # md in d4 |
| swap %d3 # hi(mr) in lo d3 |
| swap %d4 # hi(md) in lo d4 |
| |
| # complete necessary multiplies: |
| mulu.w %d1,%d0 # [1] lo(mr) * lo(md) |
| mulu.w %d3,%d1 # [2] hi(mr) * lo(md) |
| mulu.w %d4,%d2 # [3] lo(mr) * hi(md) |
| mulu.w %d4,%d3 # [4] hi(mr) * hi(md) |
| |
| # add lo portions of [2],[3] to hi portion of [1]. |
| # add carries produced from these adds to [4]. |
| # lo([1]) is the final lo 16 bits of the result. |
| clr.l %d4 # load d4 w/ zero value |
| swap %d0 # hi([1]) <==> lo([1]) |
| add.w %d1,%d0 # hi([1]) + lo([2]) |
| addx.l %d4,%d3 # [4] + carry |
| add.w %d2,%d0 # hi([1]) + lo([3]) |
| addx.l %d4,%d3 # [4] + carry |
| swap %d0 # lo([1]) <==> hi([1]) |
| |
| # lo portions of [2],[3] have been added in to final result. |
| # now, clear lo, put hi in lo reg, and add to [4] |
| clr.w %d1 # clear lo([2]) |
| clr.w %d2 # clear hi([3]) |
| swap %d1 # hi([2]) in lo d1 |
| swap %d2 # hi([3]) in lo d2 |
| add.l %d2,%d1 # [4] + hi([2]) |
| add.l %d3,%d1 # [4] + hi([3]) |
| |
| tst.b %d5 # should result be signed? |
| beq.b muls64_done # no |
| |
| # result should be a signed negative number. |
| # compute 2's complement of the unsigned number: |
| # -negate all bits and add 1 |
| muls64_neg: |
| not.l %d0 # negate lo(result) bits |
| not.l %d1 # negate hi(result) bits |
| addq.l &1,%d0 # add 1 to lo(result) |
| addx.l %d4,%d1 # add carry to hi(result) |
| |
| muls64_done: |
| mov.w MUL64_CC(%a6),%d4 |
| andi.b &0x10,%d4 # keep old 'X' bit |
| tst.l %d1 # may set 'N' bit |
| bpl.b muls64_ddone |
| ori.b &0x8,%d4 # set 'N' bit |
| muls64_ddone: |
| mov.w %d4,%cc |
| |
| # here, the result is in d1 and d0. the current strategy is to save |
| # the values at the location pointed to by a0. |
| # use movm here to not disturb the condition codes. |
| muls64_end: |
| exg %d1,%d0 |
| movm.l &0x0003,([0x10,%a6]) # save result at (a0) |
| |
| # EPILOGUE BEGIN ######################################################## |
| # fmovm.l (%sp)+,&0x0 # restore no fpregs |
| movm.l (%sp)+,&0x003c # restore d2-d5 |
| unlk %a6 |
| # EPILOGUE END ########################################################## |
| |
| rts |
| |
| # one or both of the operands is zero so the result is also zero. |
| # save the zero result to the register file and set the 'Z' ccode bit. |
| muls64_zero: |
| clr.l %d0 |
| clr.l %d1 |
| |
| mov.w MUL64_CC(%a6),%d4 |
| andi.b &0x10,%d4 |
| ori.b &0x4,%d4 |
| mov.w %d4,%cc # set 'Z' ccode bit |
| |
| bra.b muls64_end |
| |
| ######################################################################### |
| # XDEF **************************************************************** # |
| # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # |
| # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # |
| # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # |
| # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # |
| # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # |
| # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # |
| # # |
| # This is the library version which is accessed as a subroutine # |
| # and therefore does not work exactly like the 680X0 "cmp2" # |
| # instruction. # |
| # # |
| # XREF **************************************************************** # |
| # None # |
| # # |
| # INPUT *************************************************************** # |
| # 0x4(sp) = Rn # |
| # 0x8(sp) = pointer to boundary pair # |
| # # |
| # OUTPUT ************************************************************** # |
| # cc = condition codes are set correctly # |
| # # |
| # ALGORITHM *********************************************************** # |
| # In the interest of simplicity, all operands are converted to # |
| # longword size whether the operation is byte, word, or long. The # |
| # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is # |
| # also sign extended. If Rn is an address register, it need not be sign # |
| # extended since the full register is always used. # |
| # The condition codes are set correctly before the final "rts". # |
| # # |
| ######################################################################### |
| |
| set CMP2_CC, -4 |
| |
| global _060LSP__cmp2_Ab_ |
| _060LSP__cmp2_Ab_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.b ([0xc,%a6],0x0),%d0 |
| mov.b ([0xc,%a6],0x1),%d1 |
| |
| extb.l %d0 # sign extend lo bnd |
| extb.l %d1 # sign extend hi bnd |
| bra.w l_cmp2_cmp # go do the compare emulation |
| |
| global _060LSP__cmp2_Aw_ |
| _060LSP__cmp2_Aw_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.w ([0xc,%a6],0x0),%d0 |
| mov.w ([0xc,%a6],0x2),%d1 |
| |
| ext.l %d0 # sign extend lo bnd |
| ext.l %d1 # sign extend hi bnd |
| bra.w l_cmp2_cmp # go do the compare emulation |
| |
| global _060LSP__cmp2_Al_ |
| _060LSP__cmp2_Al_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.l ([0xc,%a6],0x0),%d0 |
| mov.l ([0xc,%a6],0x4),%d1 |
| bra.w l_cmp2_cmp # go do the compare emulation |
| |
| global _060LSP__cmp2_Db_ |
| _060LSP__cmp2_Db_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.b ([0xc,%a6],0x0),%d0 |
| mov.b ([0xc,%a6],0x1),%d1 |
| |
| extb.l %d0 # sign extend lo bnd |
| extb.l %d1 # sign extend hi bnd |
| |
| # operation is a data register compare. |
| # sign extend byte to long so we can do simple longword compares. |
| extb.l %d2 # sign extend data byte |
| bra.w l_cmp2_cmp # go do the compare emulation |
| |
| global _060LSP__cmp2_Dw_ |
| _060LSP__cmp2_Dw_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.w ([0xc,%a6],0x0),%d0 |
| mov.w ([0xc,%a6],0x2),%d1 |
| |
| ext.l %d0 # sign extend lo bnd |
| ext.l %d1 # sign extend hi bnd |
| |
| # operation is a data register compare. |
| # sign extend word to long so we can do simple longword compares. |
| ext.l %d2 # sign extend data word |
| bra.w l_cmp2_cmp # go emulate compare |
| |
| global _060LSP__cmp2_Dl_ |
| _060LSP__cmp2_Dl_: |
| |
| # PROLOGUE BEGIN ######################################################## |
| link.w %a6,&-4 |
| movm.l &0x3800,-(%sp) # save d2-d4 |
| # fmovm.l &0x0,-(%sp) # save no fpregs |
| # PROLOGUE END ########################################################## |
| |
| mov.w %cc,CMP2_CC(%a6) |
| mov.l 0x8(%a6), %d2 # get regval |
| |
| mov.l ([0xc,%a6],0x0),%d0 |
| mov.l ([0xc,%a6],0x4),%d1 |
| |
| # |
| # To set the ccodes correctly: |
| # (1) save 'Z' bit from (Rn - lo) |
| # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) |
| # (3) keep 'X', 'N', and 'V' from before instruction |
| # (4) combine ccodes |
| # |
| l_cmp2_cmp: |
| sub.l %d0, %d2 # (Rn - lo) |
| mov.w %cc, %d3 # fetch resulting ccodes |
| andi.b &0x4, %d3 # keep 'Z' bit |
| sub.l %d0, %d1 # (hi - lo) |
| cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) |
| |
| mov.w %cc, %d4 # fetch resulting ccodes |
| or.b %d4, %d3 # combine w/ earlier ccodes |
| andi.b &0x5, %d3 # keep 'Z' and 'N' |
| |
| mov.w CMP2_CC(%a6), %d4 # fetch old ccodes |
| andi.b &0x1a, %d4 # keep 'X','N','V' bits |
| or.b %d3, %d4 # insert new ccodes |
| mov.w %d4,%cc # save new ccodes |
| |
| # EPILOGUE BEGIN ######################################################## |
| # fmovm.l (%sp)+,&0x0 # restore no fpregs |
| movm.l (%sp)+,&0x001c # restore d2-d4 |
| unlk %a6 |
| # EPILOGUE END ########################################################## |
| |
| rts |