/* | |

* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | |

* | |

* Author: Nicolas Pitre <nico at cam.org> | |

* - contributed to gcc-3.4 on Sep 30, 2003 | |

* - adapted for the Linux kernel on Oct 2, 2003 | |

*/ | |

/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | |

This file is free software; you can redistribute it and/or modify it | |

under the terms of the GNU General Public License as published by the | |

Free Software Foundation; either version 2, or (at your option) any | |

later version. | |

In addition to the permissions in the GNU General Public License, the | |

Free Software Foundation gives you unlimited permission to link the | |

compiled version of this file into combinations with other programs, | |

and to distribute those combinations without any restriction coming | |

from the use of this file. (The General Public License restrictions | |

do apply in other respects; for example, they cover modification of | |

the file, and distribution when not linked into a combine | |

executable.) | |

This file is distributed in the hope that it will be useful, but | |

WITHOUT ANY WARRANTY; without even the implied warranty of | |

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |

General Public License for more details. | |

You should have received a copy of the GNU General Public License | |

along with this program; see the file COPYING. If not, write to | |

the Free Software Foundation, 59 Temple Place - Suite 330, | |

Boston, MA 02111-1307, USA. */ | |

#include <linux/linkage.h> | |

#include <asm/assembler.h> | |

.macro ARM_DIV_BODY dividend, divisor, result, curbit | |

#if __LINUX_ARM_ARCH__ >= 5 | |

clz \curbit, \divisor | |

clz \result, \dividend | |

sub \result, \curbit, \result | |

mov \curbit, #1 | |

mov \divisor, \divisor, lsl \result | |

mov \curbit, \curbit, lsl \result | |

mov \result, #0 | |

#else | |

@ Initially shift the divisor left 3 bits if possible, | |

@ set curbit accordingly. This allows for curbit to be located | |

@ at the left end of each 4 bit nibbles in the division loop | |

@ to save one loop in most cases. | |

tst \divisor, #0xe0000000 | |

moveq \divisor, \divisor, lsl #3 | |

moveq \curbit, #8 | |

movne \curbit, #1 | |

@ Unless the divisor is very big, shift it up in multiples of | |

@ four bits, since this is the amount of unwinding in the main | |

@ division loop. Continue shifting until the divisor is | |

@ larger than the dividend. | |

1: cmp \divisor, #0x10000000 | |

cmplo \divisor, \dividend | |

movlo \divisor, \divisor, lsl #4 | |

movlo \curbit, \curbit, lsl #4 | |

blo 1b | |

@ For very big divisors, we must shift it a bit at a time, or | |

@ we will be in danger of overflowing. | |

1: cmp \divisor, #0x80000000 | |

cmplo \divisor, \dividend | |

movlo \divisor, \divisor, lsl #1 | |

movlo \curbit, \curbit, lsl #1 | |

blo 1b | |

mov \result, #0 | |

#endif | |

@ Division loop | |

1: cmp \dividend, \divisor | |

subhs \dividend, \dividend, \divisor | |

orrhs \result, \result, \curbit | |

cmp \dividend, \divisor, lsr #1 | |

subhs \dividend, \dividend, \divisor, lsr #1 | |

orrhs \result, \result, \curbit, lsr #1 | |

cmp \dividend, \divisor, lsr #2 | |

subhs \dividend, \dividend, \divisor, lsr #2 | |

orrhs \result, \result, \curbit, lsr #2 | |

cmp \dividend, \divisor, lsr #3 | |

subhs \dividend, \dividend, \divisor, lsr #3 | |

orrhs \result, \result, \curbit, lsr #3 | |

cmp \dividend, #0 @ Early termination? | |

movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? | |

movne \divisor, \divisor, lsr #4 | |

bne 1b | |

.endm | |

.macro ARM_DIV2_ORDER divisor, order | |

#if __LINUX_ARM_ARCH__ >= 5 | |

clz \order, \divisor | |

rsb \order, \order, #31 | |

#else | |

cmp \divisor, #(1 << 16) | |

movhs \divisor, \divisor, lsr #16 | |

movhs \order, #16 | |

movlo \order, #0 | |

cmp \divisor, #(1 << 8) | |

movhs \divisor, \divisor, lsr #8 | |

addhs \order, \order, #8 | |

cmp \divisor, #(1 << 4) | |

movhs \divisor, \divisor, lsr #4 | |

addhs \order, \order, #4 | |

cmp \divisor, #(1 << 2) | |

addhi \order, \order, #3 | |

addls \order, \order, \divisor, lsr #1 | |

#endif | |

.endm | |

.macro ARM_MOD_BODY dividend, divisor, order, spare | |

#if __LINUX_ARM_ARCH__ >= 5 | |

clz \order, \divisor | |

clz \spare, \dividend | |

sub \order, \order, \spare | |

mov \divisor, \divisor, lsl \order | |

#else | |

mov \order, #0 | |

@ Unless the divisor is very big, shift it up in multiples of | |

@ four bits, since this is the amount of unwinding in the main | |

@ division loop. Continue shifting until the divisor is | |

@ larger than the dividend. | |

1: cmp \divisor, #0x10000000 | |

cmplo \divisor, \dividend | |

movlo \divisor, \divisor, lsl #4 | |

addlo \order, \order, #4 | |

blo 1b | |

@ For very big divisors, we must shift it a bit at a time, or | |

@ we will be in danger of overflowing. | |

1: cmp \divisor, #0x80000000 | |

cmplo \divisor, \dividend | |

movlo \divisor, \divisor, lsl #1 | |

addlo \order, \order, #1 | |

blo 1b | |

#endif | |

@ Perform all needed substractions to keep only the reminder. | |

@ Do comparisons in batch of 4 first. | |

subs \order, \order, #3 @ yes, 3 is intended here | |

blt 2f | |

1: cmp \dividend, \divisor | |

subhs \dividend, \dividend, \divisor | |

cmp \dividend, \divisor, lsr #1 | |

subhs \dividend, \dividend, \divisor, lsr #1 | |

cmp \dividend, \divisor, lsr #2 | |

subhs \dividend, \dividend, \divisor, lsr #2 | |

cmp \dividend, \divisor, lsr #3 | |

subhs \dividend, \dividend, \divisor, lsr #3 | |

cmp \dividend, #1 | |

mov \divisor, \divisor, lsr #4 | |

subges \order, \order, #4 | |

bge 1b | |

tst \order, #3 | |

teqne \dividend, #0 | |

beq 5f | |

@ Either 1, 2 or 3 comparison/substractions are left. | |

2: cmn \order, #2 | |

blt 4f | |

beq 3f | |

cmp \dividend, \divisor | |

subhs \dividend, \dividend, \divisor | |

mov \divisor, \divisor, lsr #1 | |

3: cmp \dividend, \divisor | |

subhs \dividend, \dividend, \divisor | |

mov \divisor, \divisor, lsr #1 | |

4: cmp \dividend, \divisor | |

subhs \dividend, \dividend, \divisor | |

5: | |

.endm | |

ENTRY(__udivsi3) | |

ENTRY(__aeabi_uidiv) | |

subs r2, r1, #1 | |

moveq pc, lr | |

bcc Ldiv0 | |

cmp r0, r1 | |

bls 11f | |

tst r1, r2 | |

beq 12f | |

ARM_DIV_BODY r0, r1, r2, r3 | |

mov r0, r2 | |

mov pc, lr | |

11: moveq r0, #1 | |

movne r0, #0 | |

mov pc, lr | |

12: ARM_DIV2_ORDER r1, r2 | |

mov r0, r0, lsr r2 | |

mov pc, lr | |

ENDPROC(__udivsi3) | |

ENDPROC(__aeabi_uidiv) | |

ENTRY(__umodsi3) | |

subs r2, r1, #1 @ compare divisor with 1 | |

bcc Ldiv0 | |

cmpne r0, r1 @ compare dividend with divisor | |

moveq r0, #0 | |

tsthi r1, r2 @ see if divisor is power of 2 | |

andeq r0, r0, r2 | |

movls pc, lr | |

ARM_MOD_BODY r0, r1, r2, r3 | |

mov pc, lr | |

ENDPROC(__umodsi3) | |

ENTRY(__divsi3) | |

ENTRY(__aeabi_idiv) | |

cmp r1, #0 | |

eor ip, r0, r1 @ save the sign of the result. | |

beq Ldiv0 | |

rsbmi r1, r1, #0 @ loops below use unsigned. | |

subs r2, r1, #1 @ division by 1 or -1 ? | |

beq 10f | |

movs r3, r0 | |

rsbmi r3, r0, #0 @ positive dividend value | |

cmp r3, r1 | |

bls 11f | |

tst r1, r2 @ divisor is power of 2 ? | |

beq 12f | |

ARM_DIV_BODY r3, r1, r0, r2 | |

cmp ip, #0 | |

rsbmi r0, r0, #0 | |

mov pc, lr | |

10: teq ip, r0 @ same sign ? | |

rsbmi r0, r0, #0 | |

mov pc, lr | |

11: movlo r0, #0 | |

moveq r0, ip, asr #31 | |

orreq r0, r0, #1 | |

mov pc, lr | |

12: ARM_DIV2_ORDER r1, r2 | |

cmp ip, #0 | |

mov r0, r3, lsr r2 | |

rsbmi r0, r0, #0 | |

mov pc, lr | |

ENDPROC(__divsi3) | |

ENDPROC(__aeabi_idiv) | |

ENTRY(__modsi3) | |

cmp r1, #0 | |

beq Ldiv0 | |

rsbmi r1, r1, #0 @ loops below use unsigned. | |

movs ip, r0 @ preserve sign of dividend | |

rsbmi r0, r0, #0 @ if negative make positive | |

subs r2, r1, #1 @ compare divisor with 1 | |

cmpne r0, r1 @ compare dividend with divisor | |

moveq r0, #0 | |

tsthi r1, r2 @ see if divisor is power of 2 | |

andeq r0, r0, r2 | |

bls 10f | |

ARM_MOD_BODY r0, r1, r2, r3 | |

10: cmp ip, #0 | |

rsbmi r0, r0, #0 | |

mov pc, lr | |

ENDPROC(__modsi3) | |

ENTRY(__aeabi_uidivmod) | |

stmfd sp!, {r0, r1, ip, lr} | |

bl __aeabi_uidiv | |

ldmfd sp!, {r1, r2, ip, lr} | |

mul r3, r0, r2 | |

sub r1, r1, r3 | |

mov pc, lr | |

ENDPROC(__aeabi_uidivmod) | |

ENTRY(__aeabi_idivmod) | |

stmfd sp!, {r0, r1, ip, lr} | |

bl __aeabi_idiv | |

ldmfd sp!, {r1, r2, ip, lr} | |

mul r3, r0, r2 | |

sub r1, r1, r3 | |

mov pc, lr | |

ENDPROC(__aeabi_idivmod) | |

Ldiv0: | |

str lr, [sp, #-8]! | |

bl __div0 | |

mov r0, #0 @ About as wrong as it could be. | |

ldr pc, [sp], #8 | |