| /* MN10300 Optimised simple memory to memory copy, with support for overlapping |
| * regions |
| * |
| * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public Licence |
| * as published by the Free Software Foundation; either version |
| * 2 of the Licence, or (at your option) any later version. |
| */ |
| #include <asm/cache.h> |
| |
| .section .text |
| .balign L1_CACHE_BYTES |
| |
| ############################################################################### |
| # |
| # void *memmove(void *dst, const void *src, size_t n) |
| # |
| ############################################################################### |
| .globl memmove |
| .type memmove,@function |
| memmove: |
| # fall back to memcpy if dst < src to work bottom up |
| cmp d1,d0 |
| bcs memmove_memcpy |
| |
| # work top down |
| movm [d2,d3],(sp) |
| mov d0,(12,sp) |
| mov d1,(16,sp) |
| mov (20,sp),d2 # count |
| add d0,d2,a0 # dst end |
| add d1,d2,a1 # src end |
| mov d0,e3 # the return value |
| |
| cmp +0,d2 |
| beq memmove_done # return if zero-length copy |
| |
| # see if the three parameters are all four-byte aligned |
| or d0,d1,d3 |
| or d2,d3 |
| and +3,d3 |
| bne memmove_1 # jump if not |
| |
| # we want to transfer as much as we can in chunks of 32 bytes |
| add -4,a1 |
| cmp +31,d2 |
| bls memmove_4_remainder # 4-byte aligned remainder |
| |
| add -32,d2 |
| mov +32,d3 |
| |
| memmove_4_loop: |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| mov (a1),d1 |
| sub_sub +4,a1,+4,a0 |
| mov d1,(a0) |
| |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| mov (a1),d1 |
| sub_sub +4,a1,+4,a0 |
| mov d1,(a0) |
| |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| mov (a1),d1 |
| sub_sub +4,a1,+4,a0 |
| mov d1,(a0) |
| |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| mov (a1),d1 |
| sub_sub +4,a1,+4,a0 |
| mov d1,(a0) |
| |
| sub d3,d2 |
| bcc memmove_4_loop |
| |
| add d3,d2 |
| beq memmove_4_no_remainder |
| |
| memmove_4_remainder: |
| # cut 4-7 words down to 0-3 |
| cmp +16,d2 |
| bcs memmove_4_three_or_fewer_words |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| mov (a1),d1 |
| sub_sub +4,a1,+4,a0 |
| mov d1,(a0) |
| mov (a1),e0 |
| sub_sub +4,a1,+4,a0 |
| mov e0,(a0) |
| mov (a1),e1 |
| sub_sub +4,a1,+4,a0 |
| mov e1,(a0) |
| add -16,d2 |
| beq memmove_4_no_remainder |
| |
| # copy the remaining 1, 2 or 3 words |
| memmove_4_three_or_fewer_words: |
| cmp +8,d2 |
| bcs memmove_4_one_word |
| beq memmove_4_two_words |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| memmove_4_two_words: |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| memmove_4_one_word: |
| mov (a1),d0 |
| sub_sub +4,a1,+4,a0 |
| mov d0,(a0) |
| |
| memmove_4_no_remainder: |
| # check we copied the correct amount |
| # TODO: REMOVE CHECK |
| sub e3,a0,d2 |
| beq memmove_done |
| break |
| break |
| break |
| |
| memmove_done: |
| mov e3,a0 |
| ret [d2,d3],8 |
| |
| # handle misaligned copying |
| memmove_1: |
| add -1,a1 |
| add -1,d2 |
| mov +1,d3 |
| setlb # setlb requires the next insns |
| # to occupy exactly 4 bytes |
| |
| sub d3,d2 |
| movbu (a1),d0 |
| sub_sub d3,a1,d3,a0 |
| movbu d0,(a0) |
| lcc |
| |
| mov e3,a0 |
| ret [d2,d3],8 |
| |
| memmove_memcpy: |
| jmp memcpy |
| |
| memmove_end: |
| .size memmove, memmove_end-memmove |