blob: e795aa8b918a83c039aac84c49296a052206abd1 [file] [log] [blame]
/* Code was copied from ARC GCC toolchain, ucLibc packet (GPL), memcpy.S.
* The only changes done - access to h/w bypass cache.
* Why memcpy-700.S was not used - it looks can load same memory address twice,
* which is bad for me.
*/
#include "newlib_asm.h"
/*
* void* io_copy_from(void *dest, const void *src, size_t n);
* Access to 'src' bypass cache.
* Function is suitable for copy data from IO-mapped device.
* Function:
* - use byte,word-length operations to load/store data
* - use subsequent addresses when copy (do not walk back)
* - only load operations used for 'src'
* - only store operations used for 'dst'
*/
ENTRY(io_copy_from)
;;; if count of copies is zero
mov.f lp_count,r2
jz.d [blink]
or r4,r0,r1
and.f 0,r4,3 ; check if 4 byte aligned
#ifndef __A4__
bnz.d @copy_bytewise_from
lsr r4,r2,3 ; calculate length in words
#else
bnz.d @copy_bytewise_from
lsr r4,r2
lsr r4,r4
lsr r4,r4
#endif
;; Now both pointers are
;; 4 bytes aligned.
;;
lsr.f lp_count,r4
sub r1,r1,4
sub r3,r0,4
lpnz @copy16bytes_from
ld.a.di r4,[r1,4]
ld.a.di r5,[r1,4]
ld.a.di r6,[r1,4]
ld.a.di r7,[r1,4]
st.a r4,[r3,4]
st.a r5,[r3,4]
st.a r6,[r3,4]
st.a r7,[r3,4]
copy16bytes_from:
bcs.d @remainder8bytes_from
and.f 0,r2,4
#ifdef __A4__
nop
#endif
bnz.d @copy4bytes_from
memcpytill1_from:
and.f lp_count, r2, 3
memcpy1_from:
add r1,r1,3
jz [blink]
add r3,r3,3
lp @end_loop_from
ldb.a.di r4,[r1,1]
stb.a r4,[r3,1]
end_loop_from:
j [blink]
remainder8bytes_from:
ld.a.di r4,[r1,4]
ld.a.di r5,[r1,4]
st.a r4,[r3,4]
bz.d @memcpytill1_from
st.a r5,[r3,4]
and.f lp_count,r2,3
copy4bytes_from:
ld.a.di r4,[r1,4]
b.d @memcpy1_from
st.a r4,[r3,4]
copy_bytewise_from:
sub r1,r1,1
sub r3,r0,1
lp @.end_bytes_from
ldb.a.di r4,[r1,1]
stb.a r4,[r3,1]
.end_bytes_from:
j [blink]
ENDFUNC(io_copy_from)
/*
* void* io_copy_to(void *dest, const void *src, size_t n);
* Similar to io_copy_from, but here access to 'dest' bypass cache.
*/
ENTRY(io_copy_to)
;;; if count of copies is zero
mov.f lp_count,r2
jz.d [blink]
or r4,r0,r1
and.f 0,r4,3 ; check if 4 byte aligned
#ifndef __A4__
bnz.d @copy_bytewise_to
lsr r4,r2,3 ; calculate length in words
#else
bnz.d @copy_bytewise_to
lsr r4,r2
lsr r4,r4
lsr r4,r4
#endif
;; Now both pointers are
;; 4 bytes aligned.
;;
lsr.f lp_count,r4
sub r1,r1,4
sub r3,r0,4
lpnz @copy16bytes_to
ld.a r4,[r1,4]
ld.a r5,[r1,4]
ld.a r6,[r1,4]
ld.a r7,[r1,4]
st.a.di r4,[r3,4]
st.a.di r5,[r3,4]
st.a.di r6,[r3,4]
st.a.di r7,[r3,4]
copy16bytes_to:
bcs.d @remainder8bytes_to
and.f 0,r2,4
#ifdef __A4__
nop
#endif
bnz.d @copy4bytes_to
memcpytill1_to:
and.f lp_count, r2, 3
memcpy1_to:
add r1,r1,3
jz [blink]
add r3,r3,3
lp @end_loop_to
ldb.a r4,[r1,1]
stb.a.di r4,[r3,1]
end_loop_to:
j [blink]
remainder8bytes_to:
ld.a r4,[r1,4]
ld.a r5,[r1,4]
st.a.di r4,[r3,4]
bz.d @memcpytill1_to
st.a.di r5,[r3,4]
and.f lp_count,r2,3
copy4bytes_to:
ld.a r4,[r1,4]
b.d @memcpy1_to
st.a.di r4,[r3,4]
copy_bytewise_to:
sub r1,r1,1
sub r3,r0,1
lp @.end_bytes_to
ldb.a r4,[r1,1]
stb.a.di r4,[r3,1]
.end_bytes_to:
j [blink]
ENDFUNC(io_copy_to)