blob: d22b96bb878e2499e4d1c9c63b7b22b585981867 [file] [log] [blame]
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/* This is optimized primarily for the ARC700.
It would be possible to speed up the loops by one cycle / word
respective one cycle / byte by forcing double source 1 alignment, unrolling
by a factor of two, and speculatively loading the second word / byte of
source 1; however, that would increase the overhead for loop setup / finish,
and strcmp might often terminate early. */
#include <asm/linkage.h>
ARC_ENTRY strcmp
or r2,r0,r1
bmsk_s r2,r2,1
brne r2,0,.Lcharloop
mov_s r12,0x01010101
ror r5,r12
.Lwordloop:
ld.ab r2,[r0,4]
ld.ab r3,[r1,4]
nop_s
sub r4,r2,r12
bic r4,r4,r2
and r4,r4,r5
brne r4,0,.Lfound0
breq r2,r3,.Lwordloop
xor r0,r2,r3 ; mask for difference
sub_s r1,r0,1
bic_s r0,r0,r1 ; mask for least significant difference bit
sub r1,r5,r0
xor r0,r5,r1 ; mask for least significant difference byte
and_s r2,r2,r0
and_s r3,r3,r0
cmp_s r2,r3
mov_s r0,1
j_s.d [blink]
bset.lo r0,r0,31
.balign 4
.Lfound0:
xor r0,r2,r3 ; mask for difference
or r0,r0,r4 ; or in zero indicator
sub_s r1,r0,1
bic_s r0,r0,r1 ; mask for least significant difference bit
sub r1,r5,r0
xor r0,r5,r1 ; mask for least significant difference byte
and_s r2,r2,r0
and_s r3,r3,r0
sub.f r0,r2,r3
mov.hi r0,1
j_s.d [blink]
bset.lo r0,r0,31
.balign 4
.Lcharloop:
ldb.ab r2,[r0,1]
ldb.ab r3,[r1,1]
nop_s
breq r2,0,.Lcmpend
breq r2,r3,.Lcharloop
.Lcmpend:
j_s.d [blink]
sub r0,r2,r3
ARC_EXIT strcmp