vineetg: May 2010

-Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm

#ifndef _ASM_ARC_CHECKSUM_H | |

#define _ASM_ARC_CHECKSUM_H | |

#include <linux/in6.h> | |

/* | |

* computes the checksum of a memory block at buff, length len, | |

* and adds in "sum" (32-bit) | |

* | |

* returns a 32-bit number suitable for feeding into itself | |

* or csum_tcpudp_magic | |

* | |

* this function must be called with even lengths, except | |

* for the last fragment, which may be odd | |

* | |

* it's best to have buff aligned on a 32-bit boundary | |

*/ | |

unsigned int csum_partial(const void * buff, int len, unsigned int sum); | |

/* | |

* the same as csum_partial, but copies from src while it | |

* checksums, and handles user-space pointer exceptions correctly, when needed. | |

* | |

* here even more important to align src and dst on a 32-bit (or even | |

* better 64-bit) boundary | |

*/ | |

unsigned int csum_partial_copy(const char *src, char *dst, int len, int sum); | |

/* | |

* the same as csum_partial_copy, but copies from user space. | |

* | |

* here even more important to align src and dst on a 32-bit (or even | |

* better 64-bit) boundary | |

*/ | |

extern unsigned int csum_partial_copy_from_user(const char *src, char *dst, | |

int len, int sum, int *csum_err); | |

#define csum_partial_copy_nocheck(src, dst, len, sum) \ | |

csum_partial_copy((src), (dst), (len), (sum)) | |

/* | |

* Fold a partial checksum | |

*/ | |

static inline __sum16 csum_fold(__wsum sum) | |

{ | |

unsigned int lower, upper; | |

__sum16 folded; | |

/* The 32 bit sum is broken into two 16 bit halves which are then added | |

* We test if this results in a carry (bit 16 set), in which case it is | |

* added back. This is slightly optimal than unconditionally adding the | |

* upper bits of 16 bit sum (in anticipation of carry) back to sum | |

* | |

* Also as per the csum algorithm, after adding the carry back to sum, | |

* the carry portion needs to be discarded before doing the invert. | |

* However we leave the discard to end, when gcc for the purpose of | |

* returning a sword, anyways downconverts the word to s-word, discarding | |

* that portion | |

*/ | |

__asm__ __volatile__( | |

"bmsk %0, %3, 15 \n" // break 32 bit @sum into two 16 bit words | |

"lsr %1, %3, 16 \n" | |

"add %0, %0, %1 \n" // add them together | |

"btst %0, 16 \n" // was there a carry in 16 bit sum | |

"add.nz %0, %0, 1 \n" // if yes, add it back to sum | |

// extw needed - deferred (see below) | |

"not %2, %0 \n" // csum req a final invert of sum | |

:"=&r"(lower), "=&r" (upper), "=r" (folded) | |

:"r" (sum) | |

:"cc" | |

); | |

/* This implies extw insn, which down-conv 32-bit word to 16 bit word, | |

* for the 16 bit return semantics. It also takes care of discarding | |

* the carry portion of 16 bit sum, which we avoided purposefully | |

* in the inline asm above | |

*/ | |

return folded; | |

} | |

/* | |

* This is a version of ip_compute_csum() optimized for IP headers, | |

* which always checksum on 4 octet boundaries. | |

*/ | |

static inline __sum16 ip_fast_csum(void *iph, unsigned int ihl) | |

{ | |

void *ptr = iph; | |

unsigned int sum, tmp; | |

__asm__ __volatile__( | |

"ld.ab %0, [%2, 4] \n\t" | |

"sub.f lp_count, %3, 1 \n\t" | |

"lpnz 1f \n\t" | |

"ld.ab %1, [%2, 4] \n\t" | |

"adc.f %0, %0, %1 \n\t" | |

"1: \n\t" | |

"adc %0, %0, 0 \n\t" | |

:"=&r"(sum), "=r"(tmp), "+r"(ptr) | |

:"r"(ihl) | |

:"cc" | |

); | |

return csum_fold(sum); | |

} | |

static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, | |

unsigned long daddr, | |

unsigned short len, | |

unsigned short proto, | |

unsigned int sum) | |

{ | |

__asm__ __volatile__( | |

"add.f %0, %0, %1 \n" | |

"adc.f %0, %0, %2 \n" | |

"adc.f %0, %0, %3 \n" | |

"adc.f %0, %0, %4 \n" | |

"adc %0, %0, 0 \n" | |

: "=r" (sum) | |

: "r" (saddr), "r" (daddr), "r" (ntohs(len) << 16), | |

"r" (proto << 8), "0" (sum) | |

: "cc" | |

); | |

return sum; | |

} | |

/* | |

* computes the checksum of the TCP/UDP pseudo-header | |

* returns a 16-bit checksum, already complemented | |

*/ | |

static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, | |

unsigned long daddr, | |

unsigned short len, | |

unsigned short proto, | |

unsigned int sum) | |

{ | |

return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); | |

} | |

unsigned short ip_compute_csum(const unsigned char * buff, int len); | |

extern unsigned short ip_compute_csum(const unsigned char * buff, int len);

#endif /* _ASM_ARC_CHECKSUM_H */ |