Vector3SSE.inl
Go to the documentation of this file.00001 /**<!--------------------------------------------------------------------> 00002 @file Vector3SSE.inl 00003 @author Travis Fischer (fisch0920@gmail.com) 00004 @date Fall 2008 00005 00006 @brief 00007 Specialized implementation of Vector3, accelerated to support efficient 00008 SIMD vectorization via SSE intrinsics 00009 00010 @note this file is not meant to be #included directly 00011 @see Vector.h for more details 00012 <!-------------------------------------------------------------------->**/ 00013 00014 #ifndef VECTOR3_SSE_INL_ 00015 #define VECTOR3_SSE_INL_ 00016 00017 00018 // Constructors 00019 // ------------ 00020 00021 // Expects N arguments of type T 00022 // (srcData is a pointer to an N-length array of type T) 00023 inline Vector<3, real_t>::Vector(const real_t *srcData) 00024 : vec(_mm_setr_ps(srcData[0], srcData[1], srcData[2], 0)) 00025 { } 00026 00027 // Constructs an zero Vector 00028 inline Vector<3, real_t>::Vector() 00029 : vec(_mm_setzero_ps()) 00030 { } 00031 00032 // Copy Constructor; copies the underlying data from v to this vector 00033 inline Vector<3, real_t>::Vector(const Vector<3, real_t> &v) 00034 : vec(v.vec) 00035 { } 00036 00037 // SSE copy constructor 00038 inline Vector<3, real_t>::Vector(const m128f_t &v) 00039 : vec(v) 00040 { } 00041 00042 00043 // Accessor Operators 00044 // ------------------ 00045 00046 // @returns a const reference to the element at the given index 00047 inline const real_t &Vector<3, real_t>::operator[](const unsigned index) const { 00048 ASSERT(index < 3); 00049 00050 return data[index]; 00051 } 00052 00053 // @returns a reference to the element at the given index 00054 // @note changes to the returned element will affect this vector 00055 inline real_t &Vector<3, real_t>::operator[](const unsigned index) { 00056 ASSERT(index < 3); 00057 00058 return data[index]; 00059 } 00060 00061 // @returns a pointer to the underlying data (N-length array of type T) 00062 inline const real_t *Vector<3, real_t>::operator* () const { 00063 return data; 00064 } 00065 00066 // @returns a pointer to the underlying data (N-length array of type T) 00067 inline real_t *Vector<3, real_t>::operator* () { 00068 return data; 00069 } 00070 00071 00072 // Equality Operators 00073 // ------------------ 00074 00075 inline bool Vector<3, real_t>::operator==( 00076 const Vector<3, real_t> &v) const 00077 { 00078 return SSE_EQb(vec, v.vec); 00079 } 00080 00081 00082 inline bool Vector<3, real_t>::operator!=( 00083 const Vector<3, real_t> &v) const 00084 { 00085 return !(*this == v); 00086 } 00087 00088 // Mutator Operators 00089 // ----------------- 00090 00091 inline Vector<3, real_t> &Vector<3, real_t>::operator =( 00092 const Vector<3, real_t> &v) 00093 { 00094 vec = v.vec; 00095 00096 return *this; 00097 } 00098 00099 inline Vector<3, real_t> &Vector<3, real_t>::operator+=( 00100 const Vector<3, real_t> &rhs) 00101 { 00102 vec = _mm_add_ps(vec, rhs.vec); 00103 00104 return *this; 00105 } 00106 00107 inline Vector<3, real_t> &Vector<3, real_t>::operator-=( 00108 const Vector<3, real_t> &rhs) 00109 { 00110 vec = _mm_sub_ps(vec, rhs.vec); 00111 00112 return *this; 00113 } 00114 00115 00116 // Scalar Mutator Operators 00117 // ------------------------ 00118 00119 inline Vector<3, real_t> &Vector<3, real_t>::operator*=( 00120 const real_t &scale) 00121 { 00122 vec = _mm_mul_ps(vec, _mm_set_ps1(scale)); 00123 00124 return *this; 00125 } 00126 00127 inline Vector<3, real_t> &Vector<3, real_t>::operator/=( 00128 const real_t &scale) 00129 { 00130 if (scale != 0) { 00131 vec = _mm_div_ps(vec, _mm_set_ps1(scale)); 00132 } else { 00133 std::cerr << "Error: Attempting to divide Vector by zero" << std::endl << std::endl; 00134 ASSERT(0); 00135 } 00136 00137 return *this; 00138 } 00139 00140 00141 // Arithmetic Operators 00142 // -------------------- 00143 00144 inline Vector<3, real_t> Vector<3, real_t>::operator+ ( 00145 const Vector<3, real_t> &rhs) const 00146 { 00147 return Vector<3, real_t>(_mm_add_ps(vec, rhs.vec)); 00148 } 00149 00150 inline Vector<3, real_t> Vector<3, real_t>::operator- ( 00151 const Vector<3, real_t> &rhs) const 00152 { 00153 return Vector<3, real_t>(_mm_sub_ps(vec, rhs.vec)); 00154 } 00155 00156 inline Vector<3, real_t> Vector<3, real_t>::operator* ( 00157 const Vector<3, real_t> &rhs) const 00158 { 00159 return Vector<3, real_t>(_mm_mul_ps(vec, rhs.vec)); 00160 } 00161 00162 // Scalar Arithmetic Operators 00163 inline Vector<3, real_t> Vector<3, real_t>::operator* ( 00164 const real_t &scale) const 00165 { 00166 return Vector<3, real_t>(_mm_mul_ps(vec, _mm_set_ps1(scale))); 00167 } 00168 00169 inline Vector<3, real_t> Vector<3, real_t>::operator/ ( 00170 const real_t &scale) const 00171 { 00172 if (scale != 0) { 00173 return Vector<3, real_t>(_mm_div_ps(vec, _mm_set_ps1(scale))); 00174 } else { 00175 std::cerr << "Error: Attempting to divide Vector by zero" << std::endl << std::endl; 00176 ASSERT(0); 00177 00178 return Vector<3, real_t>(); 00179 } 00180 } 00181 00182 00183 // More Complex Functionality 00184 // -------------------------- 00185 00186 // @returns whether or not this Vector is unitized 00187 inline bool Vector<3, real_t>::isUnit() const { 00188 return (EQ(getMagnitude2(), 1)); 00189 } 00190 00191 // @returns whether or not this Vector is the zero vector 00192 inline bool Vector<3, real_t>::isZero() const { 00193 return (_mm_extract_f32(_mm_cmpeq_ps(vec, _mm_setzero_ps())) != 0); 00194 } 00195 00196 // @returns a normalized version of this vector 00197 inline Vector<3, real_t> Vector<3, real_t>::getNormalized() const { 00198 const real_t &magnitude = getMagnitude(); 00199 00200 if (magnitude == 0) { 00201 //std::cerr << "Error: Attempting to normalize vector with magnitude zero" 00202 //<< std::endl << std::endl; 00203 //ASSERT(0); 00204 00205 // TODO: how to handle this; apparently lots of meshes contain 00206 // zero-ish data which gets truncated to zero and we get here.. 00207 00208 return Vector<3, real_t>(); 00209 } else { 00210 return Vector<3, real_t>(_mm_div_ps(vec, _mm_set_ps1(magnitude))); 00211 } 00212 } 00213 00214 // @returns the reciprocal version of this vector (1.0f / this) 00215 inline Vector<3, real_t> Vector<3, real_t>::getReciprocal() const { 00216 return Vector<3, real_t>(_mm_rcp_ps(vec)); 00217 } 00218 00219 // Normalizes this Vector and returns the old magnitude 00220 inline real_t Vector<3, real_t>::normalize() { 00221 const real_t &magnitude = getMagnitude(); 00222 00223 if (magnitude != 0) { 00224 vec = _mm_div_ps(vec, _mm_set_ps1(magnitude)); 00225 00226 #ifdef DEBUG 00227 const Vector3 &v = *this; 00228 const real_t s = v[0] * v[0] + v[1] * v[1] + v[2] * v[2]; 00229 ASSERT(EQ(sqrt(s), 1)); 00230 #endif 00231 } else { 00232 //std::cerr << "Error: Attempting to normalize vector with magnitude zero" 00233 //<< std::endl << std::endl; 00234 //ASSERT(0); 00235 00236 // TODO: how to handle this; apparently lots of meshes contain 00237 // zero-ish data which gets truncated to zero and we get here.. 00238 } 00239 00240 return magnitude; 00241 } 00242 00243 // available in SSE4 as a builtin 00244 // returns the dot (inner) product between the two vectors given 00245 static inline real_t s_dot(const m128f_t &v1, const m128f_t &v2) { 00246 const SimpleSSEVector v(_mm_mul_ps(v1, v2)); 00247 00248 return (v.x + v.y + v.z); 00249 /*const m128f_t &l = _mm_movelh_ps(v, v); 00250 const m128f_t &h = _mm_movehl_ps(v, v); 00251 00252 return _mm_add_ps(l, h);*/ 00253 } 00254 00255 // @returns the magnitude of this vector 00256 inline real_t Vector<3, real_t>::getMagnitude() const { 00257 const real_t d = sqrt(getMagnitude2()); 00258 00259 #if 0 // debugging; alternate way of computing magnitude 00260 const real_t d1 = _mm_extract_f32(_mm_sqrt_ss(_mm_dot_ps(vec, vec))); 00261 ASSERT(EQ(d, d1)); 00262 #endif 00263 00264 return d; 00265 } 00266 00267 // @returns the squared magnitude of this vector 00268 inline real_t Vector<3, real_t>::getMagnitude2() const { 00269 // squared magnitude is same as dot product of a vector with itself 00270 return dot(*this); 00271 } 00272 00273 // @returns the magnitude of the vector connecting this vector to the one 00274 // passed in 00275 inline real_t Vector<3, real_t>::getDistance(const Vector<3, real_t> &v) const 00276 { 00277 return sqrt(getDistance2(v)); 00278 } 00279 00280 // @returns the squared magnitude of the vector connecting this vector to 00281 // the one passed in 00282 inline real_t Vector<3, real_t>::getDistance2(const Vector<3, real_t> &v) const 00283 { 00284 const m128f_t &v1 = _mm_sub_ps(v, vec); 00285 00286 return s_dot(v1, v1); 00287 } 00288 00289 // @returns the sum of the components in this Vector 00290 inline real_t Vector<3, real_t>::getSum() const { 00291 return data[0] + data[1] + data[2]; 00292 } 00293 00294 // @returns the average of the components in this Vector 00295 inline real_t Vector<3, real_t>::getAverage() const { 00296 return (getSum() / 3); 00297 } 00298 00299 // @returns the dot (inner) product of two vectors 00300 inline real_t Vector<3, real_t>::dot(const Vector<3, real_t> &rhs) const { 00301 return s_dot(vec, rhs.vec); 00302 } 00303 00304 00305 // Specialized Vector Functionality 00306 // -------------------------------- 00307 00308 // Convenience Constructor 00309 inline Vector<3, real_t>::Vector(const real_t &v0, const real_t &v1, 00310 const real_t &v2) 00311 : vec(_mm_setr_ps(v0, v1, v2, 0)) 00312 { } 00313 00314 // @returns the cross product of two vectors 00315 inline Vector<3, real_t> Vector<3, real_t>::cross( 00316 const Vector<3, real_t> &v) const 00317 { 00318 const Vector<3, real_t> &u = *this; 00319 00320 return Vector<3, real_t>(u[1] * v[2] - u[2] * v[1], 00321 u[2] * v[0] - u[0] * v[2], 00322 u[0] * v[1] - u[1] * v[0]); 00323 } 00324 00325 /*void crossp_sse(float v1[4], float v2[4], float out[4]) { 00326 __m128 vector1, vector2, vector3, vector4, vector5; 00327 00328 vector1 = _mm_load_ps(v1); 00329 vector2 = _mm_load_ps(v2); 00330 00331 vector3 = _mm_shuffle_ps(vector2, vector1, _MM_SHUFFLE(3, 0, 2, 2)); 00332 vector4 = _mm_shuffle_ps(vector1, vector2, _MM_SHUFFLE(3, 1, 0, 1)); 00333 00334 vector5 = _mm_mul_ps(vector3, vector4); 00335 00336 vector3 = _mm_shuffle_ps(vector1, vector2, _MM_SHUFFLE(3, 0, 2, 2)); 00337 vector4 = _mm_shuffle_ps(vector2, vector1, _MM_SHUFFLE(3, 1, 0, 1)); 00338 00339 vector3 = _mm_mul_ps(vector3, vector4); 00340 vector3 = _mm_sub_ps(vector5, vector3); 00341 00342 _mm_store_ps(out, vector3); 00343 00344 out[1] *= -1; 00345 }*/ 00346 00347 // Extra operators where Vector is on right-hand side 00348 // -------------------------------------------------- 00349 00350 // @returns the N-length vector resulting from multiplying a scalar by an 00351 // N-length vector 00352 inline Vector<3, real_t> operator* (const real_t &scale, 00353 const Vector<3, real_t> &rhs) 00354 { 00355 return rhs * scale; 00356 } 00357 00358 // @returns (-1) * rhs, which is a negated version of the original right 00359 // hand side vector 00360 inline Vector<3, real_t> operator- (const Vector<3, real_t> &rhs) { 00361 return Vector<3, real_t>(_mm_sub_ps(_mm_setzero_ps(), rhs.vec)); 00362 } 00363 00364 // Prints a Vector to an output stream 00365 inline std::ostream &operator<<(std::ostream &os, 00366 const Vector<3, real_t> &v) 00367 { 00368 os << "[ "; 00369 00370 for(unsigned i = 0; i < 3; ++i) 00371 os << v.data[i] << (i < 3 - 1 ? ", " : ""); 00372 00373 os << " ]"; 00374 return os; 00375 } 00376 00377 // @returns dimension (0,1,...,N) of maximum length 00378 inline unsigned Vector<3, real_t>::getMaxDimension() const { 00379 unsigned max = 0; 00380 real_t maxVal = 0; 00381 00382 for(unsigned i = 3 - 1; i--;) { 00383 const real_t &val = ABS(data[i]); 00384 00385 if (val > maxVal) { 00386 max = i; 00387 maxVal = val; 00388 } 00389 } 00390 00391 return max; 00392 } 00393 00394 // @returns dimension (0,1,...,N) of minimum length 00395 inline unsigned Vector<3, real_t>::getMinDimension() const { 00396 unsigned min = 3 - 1; 00397 00398 for(unsigned i = 3 - 1; i--;) 00399 if (data[i] < data[min]) 00400 min = i; 00401 00402 return min; 00403 } 00404 00405 inline Vector3 convertHemisphere(const Vector3 &w, const Vector3 &normal) { 00406 Vector3 U, V, N = normal; 00407 N.getOrthonormalBasis(U, V); 00408 00409 return (U * w[0] + N * w[1] + V * w[2]); 00410 } 00411 00412 #endif // VECTOR3_SSE_INL_ 00413
Generated on 28 Feb 2009 for Milton by
1.5.6