blob: 18c80b4c747bb062301022e7e1ccc4e524a111bb [file] [log] [blame]
/* Copyright (C) 2007 ARC International (UK) LTD */
/* Code was copied from ARC GCC toolchain, ucLibc packet (GPL) */
#include "newlib_asm.h"
#ifndef __A4__ /* Mostly optimized for ARC700,
but not bad for ARC600 either. */
/* This memcpy implementation does not support objects of 1GB or larger -
the check for alignment does not work then. */
/* We assume that most sources and destinations are aligned, and
that also lengths are mostly a multiple of four, although to a lesser
extent. */
ENTRY(memcpy)
or r3,r0,r1
asl_s r3,r3,30
mov_s r5,r0
brls.d r2,r3,.Lcopy_bytewise
sub.f r3,r2,1
ld_s r12,[r1,0]
asr.f lp_count,r3,3
bbit0.d r3,2,.Lnox4
bmsk_s r2,r2,1
st.ab r12,[r5,4]
ld.a r12,[r1,4]
.Lnox4:
lppnz .Lendloop
ld_s r3,[r1,4]
st.ab r12,[r5,4]
ld.a r12,[r1,8]
st.ab r3,[r5,4]
.Lendloop:
breq_l r2,0,.Last_store
ld r3,[r5,0]
#ifdef __LITTLE_ENDIAN__
add3 r2,-1,r2
; uses long immediate
xor_s r12,r12,r3
bmsk r12,r12,r2
xor_s r12,r12,r3
#else /* BIG ENDIAN */
sub3 r2,31,r2
; uses long immediate
xor_s r3,r3,r12
bmsk r3,r3,r2
xor_s r12,r12,r3
#endif /* ENDIAN */
.Last_store:
j_s.d [blink]
st r12,[r5,0]
.balign 4
.Lcopy_bytewise:
jcs [blink]
ldb_s r12,[r1,0]
lsr.f lp_count,r3
bcc_s .Lnox1
stb.ab r12,[r5,1]
ldb.a r12,[r1,1]
.Lnox1:
lppnz .Lendbloop
ldb_s r3,[r1,1]
stb.ab r12,[r5,1]
ldb.a r12,[r1,2]
stb.ab r3,[r5,1]
.Lendbloop:
j_s.d [blink]
stb r12,[r5,0]
ENDFUNC(memcpy)
#endif /* !__A4__ */