| /** |
| *** (C) Copyright 2008-2010 Marvell International Ltd. |
| *** All Rights Reserved |
| **/ |
| /** |
| *** MARVELL INTERNATIONAL LTD., ON BEHALF OF ITSELF AND ITS AFFILIATES |
| *** (COLLECTIVELY, "MARVELL") AND ITS SUPPLIERS MAKE NO WARRANTY OF |
| *** ANY KIND WITH REGARD TO THIS MATERIAL, INCLUDING, BUT NOT LIMITED TO, |
| *** THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
| *** PARTICULAR PURPOSE. MARVELL AND ITS SUPPLIERS ASSUME NO RESPONSIBILITY |
| *** FOR ANY ERRORS THAT MAY APPEAR IN THIS DOCUMENT. MARVELL AND ITS |
| *** SUPPLIERS MAKE NO COMMITMENT TO UPDATE NOR TO KEEP CURRENT THE |
| *** INFORMATION CONTAINED IN THIS DOCUMENT. |
| **/ |
| |
| /** |
| *** Marvell GPL License Option |
| *** |
| *** If you received this File from Marvell, you may opt to use, redistribute and/or |
| *** modify this File in accordance with the terms and conditions of the General |
| *** Public License Version 2, June 1991 (the "GPL License"), a copy of which is |
| *** available along with the File in the license.txt file or by writing to the Free |
| *** Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 or |
| *** on the worldwide web at http://www.gnu.org/licenses/gpl.txt. |
| *** |
| *** THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE IMPLIED |
| *** WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE ARE EXPRESSLY |
| *** DISCLAIMED. The GPL License provides additional details about this warranty |
| *** disclaimer. |
| **/ |
| |
| /* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published |
| by the Free Software Foundation; either version 2, or (at your |
| option) any later version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING. If not, write to the Free |
| Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. */ |
| |
| /* As a special exception, if you include this header file into source |
| files compiled by GCC, this header file does not by itself cause |
| the resulting executable to be covered by the GNU General Public |
| License. This exception does not however invalidate any other |
| reasons why the executable file might be covered by the GNU General |
| Public License. */ |
| /* |
| * Definition of a C++ class interface to WMMX(TM) instruction intrinsics. |
| * |
| */ |
| |
| #ifndef MMCLASS_H_INCLUDED |
| #define MMCLASS_H_INCLUDED |
| |
| #if !defined __cplusplus |
| #error ERROR: This file is only supported in C++ compilations! |
| #endif /* ! defined __cplusplus */ |
| |
| #include <mmintrin.h> |
| |
| class I64vec1; |
| class I32vec2; |
| class Is32vec2; |
| class Iu32vec2; |
| class I16vec4; |
| class Is16vec4; |
| class Iu16vec4; |
| class I8vec8; |
| class Is8vec8; |
| class Iu8vec8; |
| |
| #define _wmmx_all_ones ((__m64)0xffffffffffffffffLL) |
| |
| |
| class M64 |
| { |
| protected: |
| __m64 __m ; |
| |
| public: |
| M64() { __m = 0x0; } |
| M64(__m64 mm) { __m = mm; } |
| M64(__int64 mm) { __m = _mm_set_pi32((int)(mm >> 32), (int)mm); } |
| M64(int i) { __m = _mm_cvtsi32_si64(i); } |
| |
| operator __m64() const { return __m; } |
| |
| }; |
| |
| |
| /* |
| * 1 element , each element 64-bits |
| * |
| */ |
| |
| class I64vec1 : public M64 |
| { |
| public: |
| I64vec1() { } |
| explicit I64vec1(__m64 mm) : M64(mm) { } |
| explicit I64vec1(int i) : M64(i) { } |
| explicit I64vec1(__int64 mm) : M64(mm) { } |
| |
| I64vec1& operator= (const M64 &__m1) { return *this = (I64vec1) __m1; } |
| I64vec1& operator&=(const I64vec1 &__m1) { return *this = (I64vec1) _mm_and_si64(__m,__m1); } |
| I64vec1& operator|=(const I64vec1 &__m1) { return *this = (I64vec1) _mm_or_si64(__m,__m1); } |
| I64vec1& operator^=(const I64vec1 &__m1) { return *this = (I64vec1) _mm_xor_si64(__m,__m1); } |
| |
| /* Shift Logical Operations */ |
| I64vec1 operator<<(const I64vec1 &__m1) { return (I64vec1) _mm_sll_si64(__m, __m1); } |
| I64vec1 operator<<(int count) { return (I64vec1) _mm_slli_si64(__m, count); } |
| I64vec1& operator<<=(const I64vec1 &__m1) { return *this = (I64vec1) _mm_sll_si64(__m, __m1); } |
| I64vec1& operator<<=(int count) { return *this = (I64vec1) _mm_slli_si64(__m, count); } |
| }; |
| |
| |
| /* |
| * 2 elements , each element 32-bits |
| * |
| */ |
| |
| |
| class I32vec2 : public M64 |
| { |
| public: |
| I32vec2() { } |
| I32vec2(int i0, int i1) { __m = _mm_set_pi32(i0, i1); } |
| explicit I32vec2(__m64 mm) : M64(mm) { } |
| explicit I32vec2(int i) : M64 (i) { } |
| explicit I32vec2(__int64 i): M64(i) {} |
| |
| I32vec2& operator= (const M64 &__m1) { return *this = (I32vec2) __m1; } |
| |
| I32vec2& operator&=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_and_si64(__m,__m1); } |
| I32vec2& operator|=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_or_si64(__m,__m1); } |
| I32vec2& operator^=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_xor_si64(__m,__m1); } |
| |
| I32vec2& operator +=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_add_pi32(__m,__m1); } |
| I32vec2& operator -=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_sub_pi32(__m,__m1); } |
| |
| I32vec2& operator *=(const I32vec2 &__m1) { return *this = (I32vec2)_mm_mullo_pi32(__m,__m1); } |
| |
| I32vec2 operator<<(const I32vec2 &__m1) { return (I32vec2) _mm_sll_pi32(__m,__m1); } |
| I32vec2 operator<<(int count) { return (I32vec2) _mm_slli_pi32(__m,count); } |
| I32vec2& operator<<=(const I32vec2 &__m1) { return *this = (I32vec2) _mm_sll_pi32(__m,__m1); } |
| I32vec2& operator<<=(int count) { return *this = (I32vec2) _mm_slli_pi32(__m,count); } |
| }; |
| |
| class Is32vec2 : public M64 |
| { |
| public: |
| Is32vec2() { } |
| Is32vec2(signed int i0, signed int i1) { __m = _mm_set_pi32(i0, i1); } |
| explicit Is32vec2(__m64 mm) : M64(mm) { } |
| explicit Is32vec2(int i) : M64 (i) {} |
| explicit Is32vec2(__int64 i): M64(i) {} |
| |
| Is32vec2& operator= (const M64 &__m1) { return *this = (Is32vec2) __m1; } |
| |
| Is32vec2& operator&=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_and_si64(__m,__m1); } |
| Is32vec2& operator|=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_or_si64(__m,__m1); } |
| Is32vec2& operator^=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_xor_si64(__m,__m1); } |
| |
| Is32vec2& operator +=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_add_pi32(__m,__m1); } |
| Is32vec2& operator -=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_sub_pi32(__m,__m1); } |
| |
| Is32vec2& operator *=(const Is32vec2 &__m1) { return *this = (Is32vec2)_mm_mullo_pi32(__m,__m1); } |
| |
| |
| Is32vec2 operator<<(const Is32vec2 &__m1) { return (Is32vec2) _mm_sll_pi32(__m,__m1); } |
| Is32vec2 operator<<(int count) { return (Is32vec2) _mm_slli_pi32(__m,count); } |
| Is32vec2& operator<<=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_sll_pi32(__m,__m1); } |
| Is32vec2& operator<<=(int count) { return *this = (Is32vec2) _mm_slli_pi32(__m,count); } |
| Is32vec2 operator>>(const Is32vec2 &__m1) { return (Is32vec2) _mm_sra_pi32(__m, __m1); } |
| Is32vec2 operator>>(int count) { return (Is32vec2) _mm_srai_pi32(__m, count); } |
| Is32vec2& operator>>=(const Is32vec2 &__m1) { return *this = (Is32vec2) _mm_sra_pi32(__m, __m1); } |
| Is32vec2& operator>>=(int count) { return *this = (Is32vec2) _mm_srai_pi32(__m, count); } |
| |
| }; |
| |
| class Iu32vec2 : public M64 |
| { |
| public: |
| Iu32vec2() { } |
| Iu32vec2(unsigned int i0, unsigned int i1) { __m = _mm_set_pi32(i0, i1); } |
| explicit Iu32vec2(__m64 mm) : M64(mm) { } |
| explicit Iu32vec2(int i) : M64 (i) { } |
| explicit Iu32vec2(__int64 i) : M64 (i) { } |
| |
| Iu32vec2& operator= (const M64 &__m1) { return *this = (Iu32vec2) __m1; } |
| |
| Iu32vec2& operator&=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_and_si64(__m,__m1); } |
| Iu32vec2& operator|=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_or_si64(__m,__m1); } |
| Iu32vec2& operator^=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_xor_si64(__m,__m1); } |
| |
| Iu32vec2& operator +=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_add_pi32(__m,__m1); } |
| Iu32vec2& operator -=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_sub_pi32(__m,__m1); } |
| |
| Iu32vec2& operator *=(const Iu32vec2 &__m1) { return *this = (Iu32vec2)_mm_mullo_pi32(__m,__m1); } |
| |
| Iu32vec2 operator<<(const Iu32vec2 &__m1) { return (Iu32vec2) _mm_sll_pi32(__m,__m1); } |
| Iu32vec2 operator<<(int count) { return (Iu32vec2) _mm_slli_pi32(__m,count); } |
| Iu32vec2& operator<<=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_sll_pi32(__m,__m1); } |
| Iu32vec2& operator<<=(int count) { return *this = (Iu32vec2) _mm_slli_pi32(__m,count); } |
| Iu32vec2 operator>>(const Iu32vec2 &__m1) { return (Iu32vec2) _mm_srl_pi32(__m,__m1); } |
| Iu32vec2 operator>>(int count) { return (Iu32vec2) _mm_srli_pi32(__m,count); } |
| Iu32vec2& operator>>=(const Iu32vec2 &__m1) { return *this = (Iu32vec2) _mm_srl_pi32(__m,__m1); } |
| Iu32vec2& operator>>=(int count) { return *this = (Iu32vec2) _mm_srli_pi32(__m,count); } |
| |
| }; |
| |
| /* |
| * 4 elements , each element 16-bits |
| * |
| */ |
| |
| class I16vec4 : public M64 |
| { |
| public: |
| I16vec4() { } |
| I16vec4(short i0, short i1, short i2, short i3) { __m = _mm_set_pi16(i0, i1, i2, i3); } |
| explicit I16vec4(__m64 mm) : M64(mm) { } |
| explicit I16vec4(__int64 i) : M64 (i) { } |
| explicit I16vec4(int i) : M64 (i) { } |
| |
| I16vec4& operator= (const M64 &__m1) { return *this = (I16vec4) __m1; } |
| |
| I16vec4& operator&=(const I16vec4 &__m1) { return *this = (I16vec4) _mm_and_si64(__m,__m1); } |
| I16vec4& operator|=(const I16vec4 &__m1) { return *this = (I16vec4) _mm_or_si64(__m,__m1); } |
| I16vec4& operator^=(const I16vec4 &__m1) { return *this = (I16vec4) _mm_xor_si64(__m,__m1); } |
| |
| I16vec4& operator +=(const I16vec4 &__m1) { return *this = (I16vec4)_mm_add_pi16(__m,__m1); } |
| I16vec4& operator -=(const I16vec4 &__m1) { return *this = (I16vec4)_mm_sub_pi16(__m,__m1); } |
| I16vec4& operator *=(const I16vec4 &__m1) { return *this = (I16vec4)_mm_mullo_pi16(__m,__m1); } |
| |
| I16vec4 operator<<(const I16vec4 &__m1) { return (I16vec4) _mm_sll_pi16(__m,__m1); } |
| I16vec4 operator<<(int count) { return (I16vec4) _mm_slli_pi16(__m,count); } |
| I16vec4& operator<<=(const I16vec4 &__m1) { return *this = (I16vec4)_mm_sll_pi16(__m,__m1); } |
| I16vec4& operator<<=(int count) { return *this = (I16vec4)_mm_slli_pi16(__m,count); } |
| |
| }; |
| |
| class Is16vec4 : public M64 |
| { |
| public: |
| Is16vec4() { } |
| Is16vec4(short i0, short i1, short i2, short i3){ __m = _mm_set_pi16(i0, i1, i2, i3);} |
| explicit Is16vec4(__m64 mm) : M64(mm) { } |
| explicit Is16vec4(__int64 i) : M64 (i) { } |
| explicit Is16vec4(int i) : M64 (i) { } |
| |
| Is16vec4& operator= (const M64 &__m1) { return *this = (Is16vec4) __m1; } |
| |
| Is16vec4& operator&=(const Is16vec4 &__m1) { return *this = (Is16vec4) _mm_and_si64(__m,__m1); } |
| Is16vec4& operator|=(const Is16vec4 &__m1) { return *this = (Is16vec4) _mm_or_si64(__m,__m1); } |
| Is16vec4& operator^=(const Is16vec4 &__m1) { return *this = (Is16vec4) _mm_xor_si64(__m,__m1); } |
| |
| Is16vec4& operator +=(const Is16vec4 &__m1) { return *this = (Is16vec4)_mm_add_pi16(__m,__m1); } |
| Is16vec4& operator -=(const Is16vec4 &__m1) { return *this = (Is16vec4)_mm_sub_pi16(__m,__m1); } |
| Is16vec4& operator *=(const Is16vec4 &__m1) { return *this = (Is16vec4)_mm_mullo_pi16(__m,__m1); } |
| |
| Is16vec4 operator<<(const Is16vec4 &__m1) { return (Is16vec4)_mm_sll_pi16(__m,__m1); } |
| Is16vec4 operator<<(int count) { return (Is16vec4)_mm_slli_pi16(__m,count); } |
| Is16vec4& operator<<=(const Is16vec4 &__m1) { return *this = (Is16vec4)_mm_sll_pi16(__m,__m1); } |
| Is16vec4& operator<<=(int count) { return *this = (Is16vec4)_mm_slli_pi16(__m,count); } |
| Is16vec4 operator>>(const Is16vec4 &__m1) { return (Is16vec4)_mm_sra_pi16(__m,__m1); } |
| Is16vec4 operator>>(int count) { return (Is16vec4)_mm_srai_pi16(__m,count); } |
| Is16vec4& operator>>=(const Is16vec4 &__m1) { return *this = (Is16vec4) _mm_sra_pi16(__m,__m1); } |
| Is16vec4& operator>>=(int count) { return *this = (Is16vec4) _mm_srai_pi16(__m,count); } |
| |
| }; |
| |
| class Iu16vec4 : public M64 |
| { |
| public: |
| Iu16vec4() { } |
| Iu16vec4(unsigned short ui0, unsigned short ui1, |
| unsigned short ui2, unsigned short ui3) {__m = _mm_set_pi16(ui0, ui1, ui2, ui3);} |
| explicit Iu16vec4(__m64 mm) : M64(mm) { } |
| explicit Iu16vec4(__int64 i) : M64 (i) { } |
| explicit Iu16vec4(int i) : M64 (i) { } |
| |
| Iu16vec4& operator= (const M64 &__m1) { return *this = (Iu16vec4) __m1; } |
| |
| Iu16vec4& operator&=(const Iu16vec4 &__m1) { return *this = (Iu16vec4) _mm_and_si64(__m,__m1); } |
| Iu16vec4& operator|=(const Iu16vec4 &__m1) { return *this = (Iu16vec4) _mm_or_si64(__m,__m1); } |
| Iu16vec4& operator^=(const Iu16vec4 &__m1) { return *this = (Iu16vec4) _mm_xor_si64(__m,__m1); } |
| |
| Iu16vec4& operator +=(const Iu16vec4 &__m1) { return *this = (Iu16vec4)_mm_add_pi16(__m,__m1); } |
| Iu16vec4& operator -=(const Iu16vec4 &__m1) { return *this = (Iu16vec4)_mm_sub_pi16(__m,__m1); } |
| Iu16vec4& operator *=(const Iu16vec4 &__m1) { return *this = (Iu16vec4)_mm_mullo_pi16(__m,__m1); } |
| |
| Iu16vec4 operator<<(const Iu16vec4 &__m1) { return (Iu16vec4)_mm_sll_pi16(__m,__m1); } |
| Iu16vec4 operator<<(int count) { return (Iu16vec4)_mm_slli_pi16(__m,count); } |
| Iu16vec4& operator<<=(const Iu16vec4 &__m1) { return *this = (Iu16vec4)_mm_sll_pi16(__m,__m1); } |
| Iu16vec4& operator<<=(int count) { return *this = (Iu16vec4)_mm_slli_pi16(__m,count); } |
| Iu16vec4 operator>>(const Iu16vec4 &__m1) { return (Iu16vec4)_mm_srl_pi16(__m,__m1); } |
| Iu16vec4 operator>>(int count) { return (Iu16vec4)_mm_srli_pi16(__m,count); } |
| Iu16vec4& operator>>=(const Iu16vec4 &__m1) { return *this = (Iu16vec4) _mm_srl_pi16(__m,__m1); } |
| Iu16vec4& operator>>=(int count) { return *this = (Iu16vec4) _mm_srli_pi16(__m,count); } |
| |
| }; |
| |
| /* |
| * 8 elements , each element 8-bits |
| * |
| */ |
| |
| class I8vec8 : public M64 |
| { |
| public: |
| I8vec8() { } |
| I8vec8(char s0, char s1, char s2, char s3, char s4, char s5, char s6, char s7) |
| { |
| __m = _mm_set_pi8(s0, s1, s2, s3, s4, s5, s6, s7); |
| } |
| explicit I8vec8(__m64 mm) : M64(mm) { } |
| explicit I8vec8(__int64 i) : M64 (i) { } |
| explicit I8vec8(int i) : M64 (i) { } |
| |
| I8vec8& operator= (const M64 &__m1) { return *this = (I8vec8) __m1; } |
| |
| I8vec8& operator&=(const I8vec8 &__m1) { return *this = (I8vec8) _mm_and_si64(__m,__m1); } |
| I8vec8& operator|=(const I8vec8 &__m1) { return *this = (I8vec8) _mm_or_si64(__m,__m1); } |
| I8vec8& operator^=(const I8vec8 &__m1) { return *this = (I8vec8) _mm_xor_si64(__m,__m1); } |
| |
| I8vec8& operator +=(const I8vec8 &__m1) { return *this = (I8vec8) _mm_add_pi8(__m,__m1); } |
| I8vec8& operator -=(const I8vec8 &__m1) { return *this = (I8vec8) _mm_sub_pi8(__m,__m1); } |
| |
| }; |
| |
| class Is8vec8 : public M64 |
| { |
| public: |
| Is8vec8() { } |
| Is8vec8(signed char s0, signed char s1, signed char s2, signed char s3, |
| signed char s4, signed char s5, signed char s6, signed char s7) |
| { |
| __m = _mm_set_pi8(s0, s1, s2, s3, s4, s5, s6, s7); |
| } |
| explicit Is8vec8(__m64 mm) : M64(mm) { } |
| explicit Is8vec8(__int64 i) : M64 (i) { } |
| explicit Is8vec8(int i) : M64 (i) { } |
| |
| Is8vec8& operator= (const M64 &__m1) { return *this = (Is8vec8) __m1; } |
| |
| Is8vec8& operator&=(const Is8vec8 &__m1) { return *this = (Is8vec8) _mm_and_si64(__m,__m1); } |
| Is8vec8& operator|=(const Is8vec8 &__m1) { return *this = (Is8vec8) _mm_or_si64(__m,__m1); } |
| Is8vec8& operator^=(const Is8vec8 &__m1) { return *this = (Is8vec8) _mm_xor_si64(__m,__m1); } |
| |
| Is8vec8& operator +=(const Is8vec8 &__m1) { return *this = (Is8vec8) _mm_add_pi8(__m,__m1); } |
| Is8vec8& operator -=(const Is8vec8 &__m1) { return *this = (Is8vec8) _mm_sub_pi8(__m,__m1); } |
| |
| }; |
| |
| class Iu8vec8 : public M64 |
| { |
| public: |
| Iu8vec8() { } |
| Iu8vec8(unsigned char s0, unsigned char s1, unsigned char s2, |
| unsigned char s3, unsigned char s4, unsigned char s5, |
| unsigned char s6, unsigned char s7) |
| { |
| __m = _mm_set_pi8(s0, s1, s2, s3, s4, s5, s6, s7); |
| } |
| explicit Iu8vec8(__m64 mm) : M64(mm) { } |
| explicit Iu8vec8(__int64 i) : M64 (i) { } |
| explicit Iu8vec8(int i) : M64 (i) { } |
| |
| Iu8vec8& operator= (const M64 &__m1) { return *this = (Iu8vec8) __m1; } |
| |
| Iu8vec8& operator&=(const Iu8vec8 &__m1) { return *this = (Iu8vec8) _mm_and_si64(__m,__m1); } |
| Iu8vec8& operator|=(const Iu8vec8 &__m1) { return *this = (Iu8vec8) _mm_or_si64(__m,__m1); } |
| Iu8vec8& operator^=(const Iu8vec8 &__m1) { return *this = (Iu8vec8) _mm_xor_si64(__m,__m1); } |
| |
| Iu8vec8& operator +=(const Iu8vec8 &__m1) { return *this = (Iu8vec8) _mm_add_pi8(__m,__m1); } |
| Iu8vec8& operator -=(const Iu8vec8 &__m1) { return *this = (Iu8vec8) _mm_sub_pi8(__m,__m1); } |
| |
| }; |
| |
| |
| |
| /********************************* Logicals ****************************************/ |
| #define MMCLASS_LOGICALS(vect,element) \ |
| inline I##vect##vec##element operator& (const I##vect##vec##element &a, const I##vect##vec##element &b) \ |
| { return (I##vect##vec##element) _mm_and_si64( a,b); } \ |
| inline I##vect##vec##element operator| (const I##vect##vec##element &a, const I##vect##vec##element &b) \ |
| { return (I##vect##vec##element) _mm_or_si64( a,b); } \ |
| inline I##vect##vec##element operator^ (const I##vect##vec##element &a, const I##vect##vec##element &b) \ |
| { return (I##vect##vec##element) _mm_xor_si64( a,b); } \ |
| inline I##vect##vec##element andnot (const I##vect##vec##element &a, const I##vect##vec##element &b) \ |
| { return (I##vect##vec##element) _mm_andnot_si64( a,b); } |
| |
| MMCLASS_LOGICALS(8,8) |
| MMCLASS_LOGICALS(u8,8) |
| MMCLASS_LOGICALS(s8,8) |
| MMCLASS_LOGICALS(16,4) |
| MMCLASS_LOGICALS(u16,4) |
| MMCLASS_LOGICALS(s16,4) |
| MMCLASS_LOGICALS(32,2) |
| MMCLASS_LOGICALS(u32,2) |
| MMCLASS_LOGICALS(s32,2) |
| MMCLASS_LOGICALS(64,1) |
| #undef MMCLASS_LOGICALS |
| |
| |
| /********************************* Add & Sub ****************************************/ |
| #define MMCLASS_ADD_SUB(vect,element,opsize) \ |
| inline I##vect##vec##element operator+ (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element) _mm_add_pi##opsize( __m1,__m2); } \ |
| inline I##vect##vec##element operator- (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element) _mm_sub_pi##opsize( __m1,__m2); } |
| MMCLASS_ADD_SUB(8,8, 8) |
| MMCLASS_ADD_SUB(u8,8,8) |
| MMCLASS_ADD_SUB(s8,8,8) |
| MMCLASS_ADD_SUB(16,4,16) |
| MMCLASS_ADD_SUB(u16,4,16) |
| MMCLASS_ADD_SUB(s16,4,16) |
| MMCLASS_ADD_SUB(32,2, 32) |
| MMCLASS_ADD_SUB(u32,2,32) |
| MMCLASS_ADD_SUB(s32,2,32) |
| |
| #undef MMCLASS_ADD_SUB |
| |
| /********************************* SAT Add & Sub ****************************************/ |
| #define MMCLASS_SAT_ADD_SUB(vect,element,opsize) \ |
| inline I##vect##vec##element sat_add(const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_adds_p##opsize( __m1,__m2); }\ |
| inline I##vect##vec##element sat_sub(const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_subs_p##opsize( __m1,__m2); } |
| |
| MMCLASS_SAT_ADD_SUB(u8,8,u8) |
| MMCLASS_SAT_ADD_SUB(s8,8,i8) |
| MMCLASS_SAT_ADD_SUB(u16,4,u16) |
| MMCLASS_SAT_ADD_SUB(s16,4,i16) |
| MMCLASS_SAT_ADD_SUB(u32,2,u32) |
| MMCLASS_SAT_ADD_SUB(s32,2,i32) |
| |
| #undef MMCLASS_SAT_ADD_SUB |
| |
| /********************************* Compare Operator ****************************************/ |
| #define MMCLASS_EQ(vect,element,opsize) \ |
| inline I##vect##vec##element operator== (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_cmpeq_pi##opsize( __m1,__m2); }\ |
| inline I##vect##vec##element operator!= (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_andnot_si64(_mm_cmpeq_pi##opsize(__m1,__m2), _wmmx_all_ones); } |
| |
| MMCLASS_EQ(8,8, 8) |
| MMCLASS_EQ(u8,8,8) |
| MMCLASS_EQ(s8,8,8) |
| MMCLASS_EQ(16,4,16) |
| MMCLASS_EQ(u16,4,16) |
| MMCLASS_EQ(s16,4,16) |
| MMCLASS_EQ(32,2, 32) |
| MMCLASS_EQ(u32,2,32) |
| MMCLASS_EQ(s32,2,32) |
| |
| #undef MMCLASS_EQ |
| |
| #define MMCLASS_GT(vect,element,opsize) \ |
| inline I##vect##vec##element operator> (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_cmpgt_p##opsize( __m1,__m2); } \ |
| inline I##vect##vec##element operator< (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_cmpgt_p##opsize( __m2,__m1); } \ |
| inline I##vect##vec##element operator>= (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_andnot_si64(_mm_cmpgt_p##opsize( __m2,__m1), _wmmx_all_ones); } \ |
| inline I##vect##vec##element operator<= (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_andnot_si64(_mm_cmpgt_p##opsize( __m1,__m2), _wmmx_all_ones); } |
| |
| |
| MMCLASS_GT(u8,8,u8) |
| MMCLASS_GT(s8,8,i8) |
| MMCLASS_GT(u16,4,u16) |
| MMCLASS_GT(s16,4,i16) |
| MMCLASS_GT(u32,2,u32) |
| MMCLASS_GT(s32,2,i32) |
| |
| #undef MMCLASS_GT |
| |
| /********************************* Unpack ****************************************/ |
| #define MMCLASS_UNPACK(vect,element,opsize) \ |
| inline I##vect##vec##element unpack_low (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_unpacklo_pi##opsize( __m1,__m2); } \ |
| inline I##vect##vec##element unpack_high (const I##vect##vec##element &__m1, const I##vect##vec##element &__m2) \ |
| { return (I##vect##vec##element)_mm_unpackhi_pi##opsize( __m1,__m2); } |
| |
| MMCLASS_UNPACK(8,8, 8) |
| MMCLASS_UNPACK(u8,8,8) |
| MMCLASS_UNPACK(s8,8,8) |
| MMCLASS_UNPACK(16,4,16) |
| MMCLASS_UNPACK(u16,4,16) |
| MMCLASS_UNPACK(s16,4,16) |
| MMCLASS_UNPACK(32,2, 32) |
| MMCLASS_UNPACK(u32,2,32) |
| MMCLASS_UNPACK(s32,2,32) |
| |
| #undef MMCLASS_UNPACK |
| |
| /********************************* Multiplication ****************************************/ |
| |
| #define MMCLASS_MULLO(vect,element,opsize) \ |
| inline I##vect##vec##element operator*(const I##vect##vec##element &__m1, const I##vect##vec##element &__m2)\ |
| { return (I##vect##vec##element)_mm_mullo_pi##opsize(__m1,__m2); } |
| |
| MMCLASS_MULLO(u16,4,16) |
| MMCLASS_MULLO(s16,4,16) |
| MMCLASS_MULLO(16,4,16) |
| MMCLASS_MULLO(u32,2,32) |
| MMCLASS_MULLO(s32,2,32) |
| MMCLASS_MULLO(32,2,32) |
| |
| #undef MMCLASS_MULLO |
| |
| #define MMCLASS_MULHI(vect,element,opsize) \ |
| inline I##vect##vec##element mul_high(const I##vect##vec##element &__m1, const I##vect##vec##element &__m2)\ |
| { return (I##vect##vec##element)_mm_mulhi_p##opsize(__m1,__m2); } |
| |
| MMCLASS_MULHI(s16,4,i16) |
| MMCLASS_MULHI(u16,4,u16) |
| MMCLASS_MULHI(s32,2,i32) |
| MMCLASS_MULHI(u32,2,u32) |
| |
| #undef MMCLASS_MULLO |
| |
| inline Is32vec2 mul_add(const Is16vec4 &__m1, const Is16vec4 &__m2) { return (Is32vec2)_mm_madd_pi16(__m1,__m2);} |
| inline Iu32vec2 mul_add(const Iu16vec4 &__m1, const Iu16vec4 &__m2) { return (Iu32vec2)_mm_madd_pu16(__m1,__m2);} |
| |
| /********************************* Pack/Unpack ****************************************/ |
| |
| inline Is16vec4 pack_sat(const Is32vec2 &__m1, const Is32vec2 &__m2) { return (Is16vec4)_mm_packs_pi32(__m1,__m2); } |
| inline Is8vec8 pack_sat(const Is16vec4 &__m1, const Is16vec4 &__m2) { return (Is8vec8)_mm_packs_pi16(__m1,__m2); } |
| |
| inline Iu16vec4 packu_sat(const Iu32vec2 &__m1, const Iu32vec2 &__m2) { return (Iu16vec4)_mm_packs_pu32(__m1,__m2); } |
| inline Iu8vec8 packu_sat(const Iu16vec4 &__m1, const Iu16vec4 &__m2) { return (Iu8vec8)_mm_packs_pu16(__m1,__m2); } |
| |
| #endif // MMCLASS_H_INCLUDED |