/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log Inspired by Intel Approximate Math library, and based on the corresponding algorithms of the cephes math library The default is to use the SSE1 version. If you define USE_SSE2 the the SSE2 intrinsics will be used in place of the MMX intrinsics. Do not expect any significant performance improvement with SSE2. */ /* Copyright (C) 2007 Julien Pommier This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. (this is the zlib license) */ #ifndef SSE_MATHFUN #define SSE_MATHFUN #include #include /* yes I know, the top of this file is quite ugly */ #ifdef _MSC_VER /* visual c++ */ # define ALIGN16_BEG __declspec(align(16)) # define ALIGN16_END #else /* gcc or icc */ # define ALIGN16_BEG # define ALIGN16_END __attribute__((aligned(16))) #endif /* __m128 is ugly to write */ //typedef __m128 _v4sfu; // vector of 4 float (sse1) #ifndef USE_SSE2 //sry this is all sse2 now #define USE_SSE2 #endif #ifdef USE_SSE2 # include #else typedef __m64 v2si; // vector of 2 int (mmx) #endif // !!! Andrew Hallendorff Warning changed call structure to make compatible with gcc typedef ALIGN16_BEG union { float m128_f32[4]; int8_t m128_i8[16]; int16_t m128_i16[8]; int32_t m128_i32[4]; int64_t m128_i64[2]; uint8_t m128_u8[16]; uint16_t m128_u16[8]; uint32_t m128_u32[4]; uint64_t m128_u64[2]; } ALIGN16_END v4sfu; __m128 log_ps(v4sfu *xPtr); __m128 sin_ps(v4sfu *xPtr); void sincos_ps(v4sfu *xptr, v4sfu *sptr, v4sfu *cptr); #endif