SIMD.h
Go to the documentation of this file.00001 /**<!--------------------------------------------------------------------> 00002 @file SIMD.h 00003 @author Travis Fischer (fisch0920@gmail.com) 00004 @date Fall 2008 00005 00006 @brief 00007 Contains useful/common definitions for working with SSE intrinsics 00008 <!-------------------------------------------------------------------->**/ 00009 00010 #ifndef SIMD_H_ 00011 #define SIMD_H_ 00012 00013 #include <common/common.h> 00014 00015 #if MILTON_ENABLE_SSE 00016 00017 #include <xmmintrin.h> // SSE1 00018 #include <emmintrin.h> // SSE2 00019 #include <pmmintrin.h> // SSE3 00020 00021 typedef __m64 m64_t; 00022 typedef __m128 m128f_t; 00023 typedef __m128i m128i_t; 00024 typedef __m128d m128d_t; 00025 00026 // alignment utilities 00027 #define SSE_ALIGN16_PRE ALIGN_PRE(16) 00028 #define SSE_ALIGN16_POST ALIGN_POST(16) 00029 00030 #define DECLARE_ALIGNED_MEMORY_OPERATORS \ 00031 static inline void *operator new(size_t s) { \ 00032 return malloc_aligned((unsigned)s); \ 00033 } \ 00034 static inline void *operator new[](size_t s) { \ 00035 return malloc_aligned((unsigned)s); \ 00036 } \ 00037 static inline void operator delete(void* ptr) { \ 00038 free_aligned(ptr); \ 00039 } \ 00040 static inline void operator delete[](void* ptr) { \ 00041 free_aligned(ptr); \ 00042 } 00043 00044 #include <memory> 00045 00046 extern void *operator new (size_t size) throw (std::bad_alloc); 00047 extern void *operator new[](size_t size) throw (std::bad_alloc); 00048 00049 #ifdef __cplusplus 00050 extern "C" { // turn off name mangling 00051 #endif 00052 00053 /** 00054 * @returns a pointer to a block of memory allocated with malloc which 00055 * is aligned on a 16-byte boundary 00056 * @note returned memory should be freed with free_aligned 00057 * 00058 * @see also _mm_malloc 00059 */ 00060 extern void *malloc_aligned(unsigned n); 00061 00062 /** 00063 * @brief 00064 * frees the memory at the address given which is assumed to have 00065 * been previously allocated with malloc_aligned 00066 * 00067 * @see also _mm_free 00068 */ 00069 extern void free_aligned(void *ptr); 00070 00071 #ifdef __cplusplus 00072 } // end of extern "C" 00073 #endif 00074 00075 00076 /** 00077 * @brief 00078 * 128-bit SSE (Streaming SIMD Extension) registers require 16-byte 00079 * alignment which necessitates special care when allocating objects containing 00080 * SSE data types both on the stack and on the heap (via the new operator) 00081 * 00082 * @note 00083 * SSE has been <b>enabled</b> in this build of Milton 00084 */ 00085 struct SSE_ALIGN16_PRE SSEAligned { 00086 DECLARE_ALIGNED_MEMORY_OPERATORS 00087 } SSE_ALIGN16_POST; 00088 00089 struct SSE_ALIGN16_PRE SimpleSSEVector : public SSEAligned { 00090 union { 00091 real_t data[4]; 00092 m128f_t vec; 00093 00094 struct { real_t x, y, z, w; }; 00095 }; 00096 00097 inline SimpleSSEVector(const m128f_t &v) 00098 : vec(v) 00099 { } 00100 } SSE_ALIGN16_POST; 00101 00102 00103 #define _mm_extract_epi32(x, imm) \ 00104 ((real_t)_mm_cvtsi128_si32(_mm_srli_si128((x), 4 * (imm)))) 00105 00106 #define _mm_extract_f32i(vec, index) \ 00107 (SimpleSSEVector((vec)).data[(index)]) 00108 00109 #define _mm_extract_f32(vec) \ 00110 (SimpleSSEVector((vec)).data[0]) 00111 00112 00113 #ifdef __cplusplus 00114 extern "C" { // turn off name mangling 00115 #endif 00116 00117 /** 00118 * @returns fuzzy element-wise equality between @p a and @p b 00119 */ 00120 static inline m128f_t SSE_EQ(const m128f_t &a, const m128f_t &b) { 00121 // could use the _mm_cmpeq_ps func here, but it has the same problems as 00122 // comparing a float; so, see if a is roughly close to b... 00123 const m128f_t &a_plus_a_bit = _mm_add_ps(a, _mm_set_ps1(EPSILON)); 00124 const m128f_t &a_minus_a_bit = _mm_sub_ps(a, _mm_set_ps1(EPSILON)); 00125 00126 const m128f_t &b_lt_a = _mm_cmplt_ps(b, a_plus_a_bit); 00127 const m128f_t &b_gt_a = _mm_cmpgt_ps(b, a_minus_a_bit); 00128 00129 return _mm_and_ps(b_lt_a, b_gt_a); 00130 } 00131 00132 /** 00133 * @returns true iff @a and @b are approximately equal 00134 */ 00135 static inline bool SSE_EQb(const m128f_t &a, const m128f_t &b) { 00136 const SimpleSSEVector v(_mm_cmpneq_ps(SSE_EQ(a, b), _mm_setzero_ps())); 00137 00138 return (v.data[0] != 0 && v.data[1] != 0 && v.data[2] != 0 && v.data[3] != 0); 00139 } 00140 00141 /** 00142 * @returns fuzzy element-wise inequality between @p a and @p b 00143 */ 00144 static inline m128f_t SSE_NEQ(const m128f_t &a, const m128f_t &b) { 00145 // could use the _mm_cmpeq_ps func here, but it has the same problems as 00146 // comparing a float; so, see if a is roughly close to b... 00147 const m128f_t &a_plus_a_bit = _mm_add_ps(a, _mm_set_ps1(EPSILON)); 00148 const m128f_t &a_minus_a_bit = _mm_sub_ps(a, _mm_set_ps1(EPSILON)); 00149 00150 const m128f_t &b_gt_a = _mm_cmpgt_ps(b, a_plus_a_bit); 00151 const m128f_t &b_lt_a = _mm_cmplt_ps(b, a_minus_a_bit); 00152 00153 return _mm_or_ps(b_lt_a, b_gt_a); 00154 } 00155 00156 /** 00157 * @returns true iff @a and @b are not approximately equal 00158 */ 00159 static inline bool SSE_NEQb(const m128f_t &a, const m128f_t &b) { 00160 const SimpleSSEVector v(_mm_cmpneq_ps(SSE_EQ(a, b), _mm_setzero_ps())); 00161 00162 return (v.data[0] == 0 || v.data[1] == 0 || v.data[2] == 0 || v.data[3] == 0); 00163 } 00164 00165 #ifdef __cplusplus 00166 } // end of extern "C" 00167 #endif 00168 00169 #else // MILTON_ENABLE_SSE 00170 00171 /** 00172 * @brief 00173 * 128-bit SSE (Streaming SIMD Extension) registers require 16-byte 00174 * alignment which necessitates special care when allocating objects containing 00175 * SSE data types both on the stack and on the heap (via the new operator) 00176 * 00177 * @note 00178 * SSE has been <b>disabled</b> in this build of Milton, resulting in 00179 * SSEAligned being just a dummy, placeholder class with no real functionality 00180 * or effect 00181 */ 00182 struct SSEAligned { }; 00183 00184 #endif // MILTON_ENABLE_SSE 00185 00186 #endif // SIMD_H_ 00187
Generated on 28 Feb 2009 for Milton by
1.5.6