From 64924b954d94338f22d19f3ea1eef701aad64549 Mon Sep 17 00:00:00 2001 From: Luxko Date: Sat, 25 Feb 2017 07:26:11 +0800 Subject: [PATCH 1/6] [WIP]Add basic SIMD support - Add an opt-in SIMD support for the module. The feature requires crate `simd` and specialization, thus can only be enabled under nightly. Under the given benchmark certain operations were able to be up to 60% faster. Currently the supported types as well as operations are highly limited. - Clean up some deadly tests. Also add new tests for SIMD. --- Cargo.toml | 2 + src/lib.rs | 5 +- src/macros.rs | 257 ++++++++++++++++++ src/matrix.rs | 203 ++++++++++++-- src/vector.rs | 628 +++++++++++++++++++++++++++++++++++++++++++- tests/quaternion.rs | 4 +- tests/vectorf32.rs | 267 +++++++++++++++++++ 7 files changed, 1335 insertions(+), 31 deletions(-) create mode 100644 tests/vectorf32.rs diff --git a/Cargo.toml b/Cargo.toml index 12bd240..f470093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ name = "cgmath" unstable = [] default = ["rustc-serialize"] eders = ["serde", "serde_macros"] +use_simd = ["simd"] [dependencies] approx = "0.1" @@ -38,6 +39,7 @@ rand = "0.3" rustc-serialize = { version = "0.3", optional = true } serde = { version = "0.8", optional = true } serde_macros = { version = "0.8", optional = true } +simd = { version = "0.2", optional = true } [dev-dependencies] glium = "0.15" diff --git a/src/lib.rs b/src/lib.rs index 822857b..8941603 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,9 +49,9 @@ //! ```rust //! use cgmath::prelude::*; //! ``` - #![cfg_attr(feature = "eders", feature(plugin, custom_derive))] #![cfg_attr(feature = "eders", plugin(serde_macros))] +#![cfg_attr(feature = "use_simd", feature(specialization))] #[macro_use] extern crate approx; @@ -64,6 +64,9 @@ extern crate rustc_serialize; #[cfg(feature = "eders")] extern crate serde; +#[cfg(feature = "use_simd")] +extern crate simd; + // Re-exports pub use approx::*; diff --git a/src/macros.rs b/src/macros.rs index d9e54de..ac741e8 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -254,3 +254,260 @@ macro_rules! impl_index_operators { } } } + +#[cfg(feature = "use_simd")] +macro_rules! impl_operator_default { + // When it is an unary operator + (<$S:ident: $Constraint:ident> $Op:ident for $Lhs:ty { + fn $op:ident($x:ident) -> $Output:ty { $body:expr } + }) => { + impl<$S: $Constraint> $Op for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self) -> $Output { + let $x = self; $body + } + } + + impl<'a, $S: $Constraint> $Op for &'a $Lhs { + type Output = $Output; + #[inline] + default fn $op(self) -> $Output { + let $x = self; $body + } + } + }; + // When the right operand is a scalar + (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ident> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + impl<$S: $Constraint> $Op<$Rhs> for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + impl<'a, $S: $Constraint> $Op<$Rhs> for &'a $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + }; + // When the right operand is a compound type + (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ty> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + impl<$S: $Constraint> $Op<$Rhs> for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + impl<'a, $S: $Constraint> $Op<&'a $Rhs> for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + impl<'a, $S: $Constraint> $Op<$Rhs> for &'a $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + impl<'a, 'b, $S: $Constraint> $Op<&'a $Rhs> for &'b $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + }; + // When the left operand is a scalar + ($Op:ident<$Rhs:ident<$S:ident>> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + impl $Op<$Rhs<$S>> for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: $Rhs<$S>) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + impl<'a> $Op<&'a $Rhs<$S>> for $Lhs { + type Output = $Output; + #[inline] + default fn $op(self, other: &'a $Rhs<$S>) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + }; +} + +#[cfg(feature = "use_simd")] +macro_rules! impl_assignment_operator_default { + (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ty> for $Lhs:ty { + fn $op:ident(&mut $lhs:ident, $rhs:ident) $body:block + }) => { + impl<$S: $Constraint + $Op<$S>> $Op<$Rhs> for $Lhs { + #[inline] + default fn $op(&mut $lhs, $rhs: $Rhs) $body + } + }; +} + +/// Generates a binary operator implementation for the permutations of by-ref and by-val, for simd +#[cfg(feature = "use_simd")] +macro_rules! impl_operator_simd { + // When it is an unary operator + ([$Simd:ident]; $Op:ident for $Lhs:ty { + fn $op:ident($x:ident) -> $Output:ty { $body:expr } + }) => { + + impl $Op for $Lhs { + #[inline] + fn $op(self) -> $Output { + let $x: $Simd = self.into(); $body + } + } + + // #[cfg(feature = "simd")] + // impl<'a> $Op for &'a $Lhs { + // type Output = $Output; + // #[inline] + // fn $op(self) -> $Output { + // let $x: $Simd = (*self).into(); $body + // } + // } + }; + // When the right operand is a scalar + (@rs [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + + impl $Op<$Rhs> for $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), $Simd::splat(other)); $body + } + } + + + impl<'a> $Op<$Rhs> for &'a $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), $Simd::splat(other)); $body + } + } + }; + // When the right operand is a compound type + ([$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + + impl $Op<$Rhs> for $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), other.into()); $body + } + } + + + impl<'a> $Op<&'a $Rhs> for $Lhs { + #[inline] + fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), (*other).into()); $body + } + } + + + impl<'a> $Op<$Rhs> for &'a $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), other.into()); $body + } + } + + + impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { + #[inline] + fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), (*other).into()); $body + } + } + }; + // When the left operand is a scalar + (@ls [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ident { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } + }) => { + + impl $Op<$Rhs> for $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), other.into()); $body + } + } + + + impl<'a> $Op<&'a $Rhs> for $Lhs { + #[inline] + fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), (*other).into()); $body + } + } + }; + + // // When left is row-vec, right is colume-matrix + // (@vm [$Simd: ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { + // fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { + + // } + // }) + + // When matrix with matrix + (@mm $Op:ident<$Rhs:ty> for $Lhs:ty { + fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body: expr } + }) => { + impl $Op<$Rhs> for $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + + impl<'a> $Op<&'a $Rhs> for $Lhs { + #[inline] + fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + + impl<'a> $Op<$Rhs> for &'a $Lhs { + #[inline] + fn $op(self, other: $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + + + impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { + #[inline] + fn $op(self, other: &'a $Rhs) -> $Output { + let ($lhs, $rhs) = (self, other); $body + } + } + } +} diff --git a/src/matrix.rs b/src/matrix.rs index 6a1e2fd..ccd3b81 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -615,6 +615,7 @@ impl Matrix for Matrix4 { } } +//#[cfg(not(feature = "use_simd"))] impl SquareMatrix for Matrix4 { type ColumnRow = Vector4; @@ -672,7 +673,7 @@ impl SquareMatrix for Matrix4 { } fn invert(&self) -> Option> { - let det = self.determinant(); + let det: S = self.determinant(); if ulps_eq!(det, &S::zero()) { None } else { let inv_det = S::one() / det; let t = self.transpose(); @@ -731,6 +732,123 @@ impl SquareMatrix for Matrix4 { ulps_eq!(self[3][2], &self[2][3]) } } +// #[cfg(feature = "use_simd")] +// impl SquareMatrix for Matrix4 { +// type ColumnRow = Vector4; + +// #[inline] +// default fn from_value(value: S) -> Matrix4 { +// Matrix4::new(value, S::zero(), S::zero(), S::zero(), +// S::zero(), value, S::zero(), S::zero(), +// S::zero(), S::zero(), value, S::zero(), +// S::zero(), S::zero(), S::zero(), value) +// } + +// #[inline] +// default fn from_diagonal(value: Vector4) -> Matrix4 { +// Matrix4::new(value.x, S::zero(), S::zero(), S::zero(), +// S::zero(), value.y, S::zero(), S::zero(), +// S::zero(), S::zero(), value.z, S::zero(), +// S::zero(), S::zero(), S::zero(), value.w) +// } + +// default fn transpose_self(&mut self) { +// self.swap_elements((0, 1), (1, 0)); +// self.swap_elements((0, 2), (2, 0)); +// self.swap_elements((0, 3), (3, 0)); +// self.swap_elements((1, 2), (2, 1)); +// self.swap_elements((1, 3), (3, 1)); +// self.swap_elements((2, 3), (3, 2)); +// } + +// default fn determinant(&self) -> S { +// let m0 = Matrix3::new(self[1][1], self[2][1], self[3][1], +// self[1][2], self[2][2], self[3][2], +// self[1][3], self[2][3], self[3][3]); +// let m1 = Matrix3::new(self[0][1], self[2][1], self[3][1], +// self[0][2], self[2][2], self[3][2], +// self[0][3], self[2][3], self[3][3]); +// let m2 = Matrix3::new(self[0][1], self[1][1], self[3][1], +// self[0][2], self[1][2], self[3][2], +// self[0][3], self[1][3], self[3][3]); +// let m3 = Matrix3::new(self[0][1], self[1][1], self[2][1], +// self[0][2], self[1][2], self[2][2], +// self[0][3], self[1][3], self[2][3]); + +// self[0][0] * m0.determinant() - +// self[1][0] * m1.determinant() + +// self[2][0] * m2.determinant() - +// self[3][0] * m3.determinant() +// } + +// #[inline] +// default fn diagonal(&self) -> Vector4 { +// Vector4::new(self[0][0], +// self[1][1], +// self[2][2], +// self[3][3]) +// } + +// default fn invert(&self) -> Option> { +// let det = self.determinant(); +// if ulps_eq!(det, &S::zero()) { None } else { +// let inv_det = S::one() / det; +// let t = self.transpose(); +// let cf = |i, j| { +// let mat = match i { +// 0 => Matrix3::from_cols(t.y.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)), +// 1 => Matrix3::from_cols(t.x.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)), +// 2 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.w.truncate_n(j)), +// 3 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.z.truncate_n(j)), +// _ => panic!("out of range"), +// }; +// let sign = if (i + j) & 1 == 1 { -S::one() } else { S::one() }; +// mat.determinant() * sign * inv_det +// }; + +// Some(Matrix4::new(cf(0, 0), cf(0, 1), cf(0, 2), cf(0, 3), +// cf(1, 0), cf(1, 1), cf(1, 2), cf(1, 3), +// cf(2, 0), cf(2, 1), cf(2, 2), cf(2, 3), +// cf(3, 0), cf(3, 1), cf(3, 2), cf(3, 3))) +// } +// } + +// default fn is_diagonal(&self) -> bool { +// ulps_eq!(self[0][1], &S::zero()) && +// ulps_eq!(self[0][2], &S::zero()) && +// ulps_eq!(self[0][3], &S::zero()) && + +// ulps_eq!(self[1][0], &S::zero()) && +// ulps_eq!(self[1][2], &S::zero()) && +// ulps_eq!(self[1][3], &S::zero()) && + +// ulps_eq!(self[2][0], &S::zero()) && +// ulps_eq!(self[2][1], &S::zero()) && +// ulps_eq!(self[2][3], &S::zero()) && + +// ulps_eq!(self[3][0], &S::zero()) && +// ulps_eq!(self[3][1], &S::zero()) && +// ulps_eq!(self[3][2], &S::zero()) +// } + +// default fn is_symmetric(&self) -> bool { +// ulps_eq!(self[0][1], &self[1][0]) && +// ulps_eq!(self[0][2], &self[2][0]) && +// ulps_eq!(self[0][3], &self[3][0]) && + +// ulps_eq!(self[1][0], &self[0][1]) && +// ulps_eq!(self[1][2], &self[2][1]) && +// ulps_eq!(self[1][3], &self[3][1]) && + +// ulps_eq!(self[2][0], &self[0][2]) && +// ulps_eq!(self[2][1], &self[1][2]) && +// ulps_eq!(self[2][3], &self[3][2]) && + +// ulps_eq!(self[3][0], &self[0][3]) && +// ulps_eq!(self[3][1], &self[1][3]) && +// ulps_eq!(self[3][2], &self[2][3]) +// } +// } impl ApproxEq for Matrix2 { type Epsilon = S::Epsilon; @@ -955,10 +1073,6 @@ macro_rules! impl_matrix { fn sub_assign(&mut self, other: $MatrixN) { $(self.$field -= other.$field);+ } } - impl_operator!( Mul<$VectorN > for $MatrixN { - fn mul(matrix, vector) -> $VectorN { $VectorN::new($(matrix.row($row_index).dot(vector.clone())),+) } - }); - impl_scalar_ops!($MatrixN { $($field),+ }); impl_scalar_ops!($MatrixN { $($field),+ }); impl_scalar_ops!($MatrixN { $($field),+ }); @@ -1001,6 +1115,25 @@ impl_matrix!(Matrix2, Vector2 { x: 0, y: 1 }); impl_matrix!(Matrix3, Vector3 { x: 0, y: 1, z: 2 }); impl_matrix!(Matrix4, Vector4 { x: 0, y: 1, z: 2, w: 3 }); +macro_rules! impl_mv_operator { + ($MatrixN:ident, $VectorN:ident { $($field:ident : $row_index:expr),+ }) => { + impl_operator!( Mul<$VectorN > for $MatrixN { + fn mul(matrix, vector) -> $VectorN {$VectorN::new($(matrix.row($row_index).dot(vector.clone())),+)} + }); + } +} + +impl_mv_operator!(Matrix2, Vector2 { x: 0, y: 1 }); +impl_mv_operator!(Matrix3, Vector3 { x: 0, y: 1, z: 2 }); +#[cfg(not(feature = "use_simd"))] +impl_mv_operator!(Matrix4, Vector4 { x: 0, y: 1, z: 2, w: 3 }); +#[cfg(feature = "use_simd")] +impl_operator!( Mul > for Matrix4 { + fn mul(matrix, vector) -> Vector4 { + matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3] + } +}); + impl_operator!( Mul > for Matrix2 { fn mul(lhs, rhs) -> Matrix2 { Matrix2::new(lhs.row(0).dot(rhs[0]), lhs.row(1).dot(rhs[0]), @@ -1020,21 +1153,21 @@ impl_operator!( Mul > for Matrix3 { // causes the LLVM to miss identical loads and multiplies. This optimization // causes the code to be auto vectorized properly increasing the performance // around ~4 times. -macro_rules! dot_matrix4 { - ($A:expr, $B:expr, $I:expr, $J:expr) => { - ($A[0][$I]) * ($B[$J][0]) + - ($A[1][$I]) * ($B[$J][1]) + - ($A[2][$I]) * ($B[$J][2]) + - ($A[3][$I]) * ($B[$J][3]) - }; -} impl_operator!( Mul > for Matrix4 { fn mul(lhs, rhs) -> Matrix4 { - Matrix4::new(dot_matrix4!(lhs, rhs, 0, 0), dot_matrix4!(lhs, rhs, 1, 0), dot_matrix4!(lhs, rhs, 2, 0), dot_matrix4!(lhs, rhs, 3, 0), - dot_matrix4!(lhs, rhs, 0, 1), dot_matrix4!(lhs, rhs, 1, 1), dot_matrix4!(lhs, rhs, 2, 1), dot_matrix4!(lhs, rhs, 3, 1), - dot_matrix4!(lhs, rhs, 0, 2), dot_matrix4!(lhs, rhs, 1, 2), dot_matrix4!(lhs, rhs, 2, 2), dot_matrix4!(lhs, rhs, 3, 2), - dot_matrix4!(lhs, rhs, 0, 3), dot_matrix4!(lhs, rhs, 1, 3), dot_matrix4!(lhs, rhs, 2, 3), dot_matrix4!(lhs, rhs, 3, 3)) + { + let a = lhs[0]; + let b = lhs[1]; + let c = lhs[2]; + let d = lhs[3]; + Matrix4::from_cols( + a*rhs[0][0] + b*rhs[0][1] + c*rhs[0][2] + d*rhs[0][3], + a*rhs[1][0] + b*rhs[1][1] + c*rhs[1][2] + d*rhs[1][3], + a*rhs[2][0] + b*rhs[2][1] + c*rhs[2][2] + d*rhs[2][3], + a*rhs[3][0] + b*rhs[3][1] + c*rhs[3][2] + d*rhs[3][3], + ) + } } }); @@ -1318,3 +1451,39 @@ impl Rand for Matrix4 { Matrix4{ x: rng.gen(), y: rng.gen(), z: rng.gen(), w: rng.gen() } } } + +// Sadly buggy. +// #[cfg(feature = "use_simd")] +// impl SquareMatrix for Matrix4 { +// fn determinant(&self) -> f32 { +// let a = Simdf32x4::new(self.z[1], self.x[1], self.w[1], self.y[1]); +// let b = Simdf32x4::new(self.y[2], self.y[2], self.z[2], self.z[2]); +// let c = Simdf32x4::new(self.x[3], self.z[3], self.x[3], self.z[3]); +// let mut tmp = a * (b * c); +// let d = Simdf32x4::new(self.y[1], self.y[1], self.z[1], self.z[1]); +// let e = Simdf32x4::new(self.x[2], self.z[2], self.x[2], self.z[2]); +// let f = Simdf32x4::new(self.z[3], self.x[3], self.w[3], self.y[3]); +// let tmp1 = d * (e * f); +// tmp = tmp + tmp1; +// let g = Simdf32x4::new(self.x[1], self.z[1], self.x[1], self.z[1]); +// let h = Simdf32x4::new(self.z[2], self.x[2], self.w[2], self.y[2]); +// let i = Simdf32x4::new(self.y[3], self.y[3], self.z[3], self.z[3]); +// let tmp1 = g * (h * i); +// tmp = tmp + tmp1; +// let tmp1 = g * (b * f); +// tmp = tmp - tmp1; +// let tmp1 = d * (h * c); +// tmp = tmp - tmp1; +// let tmp1 = a * (e * i); +// tmp = tmp - tmp1; +// let tmp: Vector4 = (tmp * Simdf32x4::new(self.x[0], self.y[0], self.z[0], self.w[0])).into(); +// tmp.sum() +// } +// } + +// #[cfg(feature = "use_simd")] +// impl_operator_simd!(@mm Mul> for Matrix4 { +// fn mul(matrix, vector) -> Vector4 { +// matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3] +// } +// }); \ No newline at end of file diff --git a/src/vector.rs b/src/vector.rs index dbab97b..8e37c83 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -25,6 +25,13 @@ use angle::Rad; use approx::ApproxEq; use num::{BaseNum, BaseFloat, PartialOrd}; +#[cfg(feature = "use_simd")] +use simd::f32x4 as Simdf32x4; +#[cfg(feature = "use_simd")] +use simd::i32x4 as Simdi32x4; +#[cfg(feature = "use_simd")] +use simd::u32x4 as Simdu32x4; + /// A 1-dimensional vector. /// /// This type is marked as `#[repr(C)]`. @@ -291,6 +298,217 @@ macro_rules! impl_vector { } } +// Utility macro for generating associated functions for the vectors +#[cfg(feature = "use_simd")] +macro_rules! impl_vector_default { + ($VectorN:ident { $($field:ident),+ }, $n:expr, $constructor:ident) => { + impl $VectorN { + /// Construct a new vector, using the provided values. + #[inline] + pub fn new($($field: S),+) -> $VectorN { + $VectorN { $($field: $field),+ } + } + } + + /// The short constructor. + #[inline] + pub fn $constructor($($field: S),+) -> $VectorN { + $VectorN::new($($field),+) + } + + impl $VectorN { + /// Component-wise casting to another type + #[inline] + pub fn cast(&self) -> $VectorN { + $VectorN { $($field: NumCast::from(self.$field).unwrap()),+ } + } + } + + impl MetricSpace for $VectorN { + type Metric = S; + + #[inline] + fn distance2(self, other: Self) -> S { + (other - self).magnitude2() + } + } + + impl Array for $VectorN { + type Element = S; + + #[inline] + fn from_value(scalar: S) -> $VectorN { + $VectorN { $($field: scalar),+ } + } + + #[inline] + fn sum(self) -> S where S: Add { + fold_array!(add, { $(self.$field),+ }) + } + + #[inline] + fn product(self) -> S where S: Mul { + fold_array!(mul, { $(self.$field),+ }) + } + + #[inline] + fn min(self) -> S where S: PartialOrd { + fold_array!(partial_min, { $(self.$field),+ }) + } + + #[inline] + fn max(self) -> S where S: PartialOrd { + fold_array!(partial_max, { $(self.$field),+ }) + } + } + + impl Zero for $VectorN { + #[inline] + fn zero() -> $VectorN { + $VectorN::from_value(S::zero()) + } + + #[inline] + fn is_zero(&self) -> bool { + *self == $VectorN::zero() + } + } + + impl VectorSpace for $VectorN { + type Scalar = S; + } + + impl> Neg for $VectorN { + type Output = $VectorN; + + #[inline] + default fn neg(self) -> $VectorN { $VectorN::new($(-self.$field),+) } + } + + impl ApproxEq for $VectorN { + type Epsilon = S::Epsilon; + + #[inline] + fn default_epsilon() -> S::Epsilon { + S::default_epsilon() + } + + #[inline] + fn default_max_relative() -> S::Epsilon { + S::default_max_relative() + } + + #[inline] + fn default_max_ulps() -> u32 { + S::default_max_ulps() + } + + #[inline] + fn relative_eq(&self, other: &Self, epsilon: S::Epsilon, max_relative: S::Epsilon) -> bool { + $(S::relative_eq(&self.$field, &other.$field, epsilon, max_relative))&&+ + } + + #[inline] + fn ulps_eq(&self, other: &Self, epsilon: S::Epsilon, max_ulps: u32) -> bool { + $(S::ulps_eq(&self.$field, &other.$field, epsilon, max_ulps))&&+ + } + } + + impl Rand for $VectorN { + #[inline] + fn rand(rng: &mut R) -> $VectorN { + $VectorN { $($field: rng.gen()),+ } + } + } + + impl_operator_default!( Add<$VectorN > for $VectorN { + fn add(lhs, rhs) -> $VectorN { $VectorN::new($(lhs.$field + rhs.$field),+) } + }); + + impl_assignment_operator_default!( AddAssign<$VectorN > for $VectorN { + fn add_assign(&mut self, other) { $(self.$field += other.$field);+ } + }); + + impl_operator_default!( Sub<$VectorN > for $VectorN { + fn sub(lhs, rhs) -> $VectorN { $VectorN::new($(lhs.$field - rhs.$field),+) } + }); + + impl_assignment_operator_default!( SubAssign<$VectorN > for $VectorN { + fn sub_assign(&mut self, other) { $(self.$field -= other.$field);+ } + }); + + impl_operator_default!( Mul for $VectorN { + fn mul(vector, scalar) -> $VectorN { $VectorN::new($(vector.$field * scalar),+) } + }); + + impl_assignment_operator_default!( MulAssign for $VectorN { + fn mul_assign(&mut self, scalar) { $(self.$field *= scalar);+ } + }); + + impl_operator_default!( Div for $VectorN { + fn div(vector, scalar) -> $VectorN { $VectorN::new($(vector.$field / scalar),+) } + }); + + impl_assignment_operator_default!( DivAssign for $VectorN { + fn div_assign(&mut self, scalar) { $(self.$field /= scalar);+ } + }); + + impl_operator!( Rem for $VectorN { + fn rem(vector, scalar) -> $VectorN { $VectorN::new($(vector.$field % scalar),+) } + }); + impl_assignment_operator!( RemAssign for $VectorN { + fn rem_assign(&mut self, scalar) { $(self.$field %= scalar);+ } + }); + + impl ElementWise for $VectorN { + #[inline] default fn add_element_wise(self, rhs: $VectorN) -> $VectorN { $VectorN::new($(self.$field + rhs.$field),+) } + #[inline] default fn sub_element_wise(self, rhs: $VectorN) -> $VectorN { $VectorN::new($(self.$field - rhs.$field),+) } + #[inline] default fn mul_element_wise(self, rhs: $VectorN) -> $VectorN { $VectorN::new($(self.$field * rhs.$field),+) } + #[inline] default fn div_element_wise(self, rhs: $VectorN) -> $VectorN { $VectorN::new($(self.$field / rhs.$field),+) } + #[inline] fn rem_element_wise(self, rhs: $VectorN) -> $VectorN { $VectorN::new($(self.$field % rhs.$field),+) } + + #[inline] default fn add_assign_element_wise(&mut self, rhs: $VectorN) { $(self.$field += rhs.$field);+ } + #[inline] default fn sub_assign_element_wise(&mut self, rhs: $VectorN) { $(self.$field -= rhs.$field);+ } + #[inline] default fn mul_assign_element_wise(&mut self, rhs: $VectorN) { $(self.$field *= rhs.$field);+ } + #[inline] default fn div_assign_element_wise(&mut self, rhs: $VectorN) { $(self.$field /= rhs.$field);+ } + #[inline] fn rem_assign_element_wise(&mut self, rhs: $VectorN) { $(self.$field %= rhs.$field);+ } + } + + impl ElementWise for $VectorN { + #[inline] default fn add_element_wise(self, rhs: S) -> $VectorN { $VectorN::new($(self.$field + rhs),+) } + #[inline] default fn sub_element_wise(self, rhs: S) -> $VectorN { $VectorN::new($(self.$field - rhs),+) } + #[inline] default fn mul_element_wise(self, rhs: S) -> $VectorN { $VectorN::new($(self.$field * rhs),+) } + #[inline] default fn div_element_wise(self, rhs: S) -> $VectorN { $VectorN::new($(self.$field / rhs),+) } + #[inline] fn rem_element_wise(self, rhs: S) -> $VectorN { $VectorN::new($(self.$field % rhs),+) } + + #[inline] default fn add_assign_element_wise(&mut self, rhs: S) { $(self.$field += rhs);+ } + #[inline] default fn sub_assign_element_wise(&mut self, rhs: S) { $(self.$field -= rhs);+ } + #[inline] default fn mul_assign_element_wise(&mut self, rhs: S) { $(self.$field *= rhs);+ } + #[inline] default fn div_assign_element_wise(&mut self, rhs: S) { $(self.$field /= rhs);+ } + #[inline] fn rem_assign_element_wise(&mut self, rhs: S) { $(self.$field %= rhs);+ } + } + + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops_default!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops_default!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + impl_scalar_ops_default!($VectorN { $($field),+ }); + impl_scalar_ops!($VectorN { $($field),+ }); + + impl_index_operators!($VectorN, $n, S, usize); + impl_index_operators!($VectorN, $n, [S], Range); + impl_index_operators!($VectorN, $n, [S], RangeTo); + impl_index_operators!($VectorN, $n, [S], RangeFrom); + impl_index_operators!($VectorN, $n, [S], RangeFull); + } +} + macro_rules! impl_scalar_ops { ($VectorN:ident<$S:ident> { $($field:ident),+ }) => { impl_operator!(Mul<$VectorN<$S>> for $S { @@ -305,10 +523,28 @@ macro_rules! impl_scalar_ops { }; } +#[cfg(feature = "use_simd")] +macro_rules! impl_scalar_ops_default { + ($VectorN:ident<$S:ident> { $($field:ident),+ }) => { + impl_operator_default!(Mul<$VectorN<$S>> for $S { + fn mul(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar * vector.$field),+) } + }); + impl_operator_default!(Div<$VectorN<$S>> for $S { + fn div(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar / vector.$field),+) } + }); + impl_operator_default!(Rem<$VectorN<$S>> for $S { + fn rem(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar % vector.$field),+) } + }); + }; +} + impl_vector!(Vector1 { x }, 1, vec1); impl_vector!(Vector2 { x, y }, 2, vec2); impl_vector!(Vector3 { x, y, z }, 3, vec3); +#[cfg(not(feature = "use_simd"))] impl_vector!(Vector4 { x, y, z, w }, 4, vec4); +#[cfg(feature = "use_simd")] +impl_vector_default!(Vector4 { x, y, z, w }, 4, vec4); impl_fixed_array_conversions!(Vector1 { x: 0 }, 1); impl_fixed_array_conversions!(Vector2 { x: 0, y: 1 }, 2); @@ -350,7 +586,7 @@ impl Vector2 { /// Create a `Vector3`, using the `x` and `y` values from this vector, and the /// provided `z`. #[inline] - pub fn extend(self, z: S)-> Vector3 { + pub fn extend(self, z: S) -> Vector3 { Vector3::new(self.x, self.y, z) } } @@ -386,13 +622,13 @@ impl Vector3 { /// Create a `Vector4`, using the `x`, `y` and `z` values from this vector, and the /// provided `w`. #[inline] - pub fn extend(self, w: S)-> Vector4 { + pub fn extend(self, w: S) -> Vector4 { Vector4::new(self.x, self.y, self.z, w) } /// Create a `Vector2`, dropping the `z` value. #[inline] - pub fn truncate(self)-> Vector2 { + pub fn truncate(self) -> Vector2 { Vector2::new(self.x, self.y) } } @@ -424,27 +660,27 @@ impl Vector4 { /// Create a `Vector3`, dropping the `w` value. #[inline] - pub fn truncate(self)-> Vector3 { + pub fn truncate(self) -> Vector3 { Vector3::new(self.x, self.y, self.z) } /// Create a `Vector3`, dropping the nth element #[inline] - pub fn truncate_n(&self, n: isize)-> Vector3 { + pub fn truncate_n(&self, n: isize) -> Vector3 { match n { 0 => Vector3::new(self.y, self.z, self.w), 1 => Vector3::new(self.x, self.z, self.w), 2 => Vector3::new(self.x, self.y, self.w), 3 => Vector3::new(self.x, self.y, self.z), - _ => panic!("{:?} is out of range", n) + _ => panic!("{:?} is out of range", n), } } } /// Dot product of two vectors. #[inline] -pub fn dot(a: V, b: V) -> V::Scalar where - V::Scalar: BaseFloat, +pub fn dot(a: V, b: V) -> V::Scalar + where V::Scalar: BaseFloat { V::dot(a, b) } @@ -515,6 +751,371 @@ impl fmt::Debug for Vector4 { } } +#[cfg(feature = "use_simd")] +impl From for Vector4 { + #[inline] + fn from(f: Simdf32x4) -> Self { + unsafe { + let mut ret: Self = mem::uninitialized(); + { + let ret_mut: &mut [f32; 4] = ret.as_mut(); + f.store(ret_mut.as_mut(), 0 as usize); + } + ret + } + } +} + +#[cfg(feature = "use_simd")] +impl Vector4 { + /// Compute and return the square root of each element. + #[inline] + pub fn sqrt_element_wide(self) -> Self { + let s: Simdf32x4 = self.into(); + s.sqrt().into() + } + + /// Compute and return the reciprocal of the square root of each element. + #[inline] + pub fn rsqrt_element_wide(self) -> Self { + let s: Simdf32x4 = self.into(); + s.approx_rsqrt().into() + } + + /// Compute and return the reciprocal of each element. + #[inline] + pub fn recip_element_wide(self) -> Self { + let s: Simdf32x4 = self.into(); + s.approx_reciprocal().into() + } +} + + + +#[cfg(feature = "use_simd")] +impl Into for Vector4 { + #[inline] + fn into(self) -> Simdf32x4 { + let self_ref: &[f32; 4] = self.as_ref(); + Simdf32x4::load(self_ref.as_ref(), 0 as usize) + } +} + + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Sub> for Vector4 { + fn sub(lhs, rhs) -> Vector4 { + (lhs - rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdf32x4]; Mul for Vector4 { + fn mul(lhs, rhs) -> Vector4 { + (lhs * rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdf32x4]; Div for Vector4 { + fn div(lhs, rhs) -> Vector4 { + (lhs / rhs).into() + } + } +} + + + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Neg for Vector4 { + fn neg(lhs) -> Vector4 { + (-lhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl AddAssign for Vector4 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s + rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl SubAssign for Vector4 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s - rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl MulAssign for Vector4 { + fn mul_assign(&mut self, other: f32) { + let s: Simdf32x4 = (*self).into(); + let other = Simdf32x4::splat(other); + *self = (s * other).into(); + } +} + +#[cfg(feature = "use_simd")] +impl DivAssign for Vector4 { + fn div_assign(&mut self, other: f32) { + let s: Simdf32x4 = (*self).into(); + let other = Simdf32x4::splat(other); + *self = (s / other).into(); + } +} + +#[cfg(feature = "use_simd")] +impl ElementWise for Vector4 { + #[inline] fn add_element_wise(self, rhs: Vector4) -> Vector4 { self + rhs } + #[inline] fn sub_element_wise(self, rhs: Vector4) -> Vector4 { self - rhs } + #[inline] fn mul_element_wise(self, rhs: Vector4) -> Vector4 { + let s: Simdf32x4 = self.into(); + let rhs: Simdf32x4 = rhs.into(); + (s * rhs).into() + } + #[inline] fn div_element_wise(self, rhs: Vector4) -> Vector4 { + let s: Simdf32x4 = self.into(); + let rhs: Simdf32x4 = rhs.into(); + (s / rhs).into() + } + + #[inline] fn add_assign_element_wise(&mut self, rhs: Vector4) { (*self) += rhs; } + #[inline] fn sub_assign_element_wise(&mut self, rhs: Vector4) { (*self) -= rhs; } + #[inline] fn mul_assign_element_wise(&mut self, rhs: Vector4) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s * rhs).into(); + } + #[inline] fn div_assign_element_wise(&mut self, rhs: Vector4) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s * rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl ElementWise for Vector4 { + #[inline] fn add_element_wise(self, rhs: f32) -> Vector4 { + let s: Simdf32x4 = self.into(); + let rhs = Simdf32x4::splat(rhs); + (s + rhs).into() + } + #[inline] fn sub_element_wise(self, rhs: f32) -> Vector4 { + let s: Simdf32x4 = self.into(); + let rhs = Simdf32x4::splat(rhs); + (s - rhs).into() + } + #[inline] fn mul_element_wise(self, rhs: f32) -> Vector4 { self * rhs } + #[inline] fn div_element_wise(self, rhs: f32) -> Vector4 { self / rhs } + + #[inline] fn add_assign_element_wise(&mut self, rhs: f32) { + let s: Simdf32x4 = (*self).into(); + let rhs = Simdf32x4::splat(rhs); + *self = (s + rhs).into(); + } + #[inline] fn sub_assign_element_wise(&mut self, rhs: f32) { + let s: Simdf32x4 = (*self).into(); + let rhs = Simdf32x4::splat(rhs); + *self = (s - rhs).into(); + } + #[inline] fn mul_assign_element_wise(&mut self, rhs: f32) { (*self) *= rhs; } + #[inline] fn div_assign_element_wise(&mut self, rhs: f32) { (*self) /= rhs; } +} + +#[cfg(feature = "use_simd")] +impl From for Vector4 { + #[inline] + fn from(f: Simdi32x4) -> Self { + unsafe { + let mut ret: Self = mem::uninitialized(); + { + let ret_mut: &mut [i32; 4] = ret.as_mut(); + f.store(ret_mut.as_mut(), 0 as usize); + } + ret + } + } +} + +#[cfg(feature = "use_simd")] +impl Into for Vector4 { + #[inline] + fn into(self) -> Simdi32x4 { + let self_ref: &[i32; 4] = self.as_ref(); + Simdi32x4::load(self_ref.as_ref(), 0 as usize) + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdi32x4]; Add> for Vector4 { + fn add(lhs, rhs) -> Vector4 { + (lhs + rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdi32x4]; Sub> for Vector4 { + fn sub(lhs, rhs) -> Vector4 { + (lhs - rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdi32x4]; Mul for Vector4 { + fn mul(lhs, rhs) -> Vector4 { + (lhs * rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdi32x4]; Neg for Vector4 { + fn neg(lhs) -> Vector4 { + (-lhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl AddAssign for Vector4 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + let s: Simdi32x4 = (*self).into(); + let rhs: Simdi32x4 = rhs.into(); + *self = (s + rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl SubAssign for Vector4 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + let s: Simdi32x4 = (*self).into(); + let rhs: Simdi32x4 = rhs.into(); + *self = (s - rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl MulAssign for Vector4 { + fn mul_assign(&mut self, other: i32) { + let s: Simdi32x4 = (*self).into(); + let other = Simdi32x4::splat(other); + *self = (s * other).into(); + } +} + +#[cfg(feature = "use_simd")] +impl From for Vector4 { + #[inline] + fn from(f: Simdu32x4) -> Self { + unsafe { + let mut ret: Self = mem::uninitialized(); + { + let ret_mut: &mut [u32; 4] = ret.as_mut(); + f.store(ret_mut.as_mut(), 0 as usize); + } + ret + } + } +} + +#[cfg(feature = "use_simd")] +impl Into for Vector4 { + #[inline] + fn into(self) -> Simdu32x4 { + let self_ref: &[u32; 4] = self.as_ref(); + Simdu32x4::load(self_ref.as_ref(), 0 as usize) + } +} + + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdu32x4]; Add> for Vector4 { + fn add(lhs, rhs) -> Vector4 { + (lhs + rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Add> for Vector4 { + fn add(lhs, rhs) -> Vector4 { + (lhs + rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdu32x4]; Sub> for Vector4 { + fn sub(lhs, rhs) -> Vector4 { + (lhs - rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdu32x4]; Mul for Vector4 { + fn mul(lhs, rhs) -> Vector4 { + (lhs * rhs).into() + } + } +} + +#[cfg(feature = "use_simd")] +impl AddAssign for Vector4 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + let s: Simdu32x4 = (*self).into(); + let rhs: Simdu32x4 = rhs.into(); + *self = (s + rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl SubAssign for Vector4 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + let s: Simdu32x4 = (*self).into(); + let rhs: Simdu32x4 = rhs.into(); + *self = (s - rhs).into(); + } +} + +#[cfg(feature = "use_simd")] +impl MulAssign for Vector4 { + fn mul_assign(&mut self, other: u32) { + let s: Simdu32x4 = (*self).into(); + let other = Simdu32x4::splat(other); + *self = (s * other).into(); + } +} + + #[cfg(test)] mod tests { mod vector2 { @@ -729,7 +1330,12 @@ mod tests { mod vector4 { use vector::*; - const VECTOR4: Vector4 = Vector4 { x: 1, y: 2, z: 3, w: 4 }; + const VECTOR4: Vector4 = Vector4 { + x: 1, + y: 2, + z: 3, + w: 4, + }; #[test] fn test_index() { @@ -796,11 +1402,11 @@ mod tests { fn test_as_mut() { let mut v = VECTOR4; { - let v: &mut[i32; 4] = v.as_mut(); + let v: &mut [i32; 4] = v.as_mut(); assert_eq!(v, &mut [1, 2, 3, 4]); } { - let v: &mut(i32, i32, i32, i32) = v.as_mut(); + let v: &mut (i32, i32, i32, i32) = v.as_mut(); assert_eq!(v, &mut (1, 2, 3, 4)); } } diff --git a/tests/quaternion.rs b/tests/quaternion.rs index f59911d..5b00619 100644 --- a/tests/quaternion.rs +++ b/tests/quaternion.rs @@ -194,13 +194,13 @@ mod rotate_from_euler { let vec = vec3(0.0, 1.0, 0.0); let rot = Quaternion::from(Euler::new(Deg(90.0), Deg(90.0), Deg(0.0))); - assert_ulps_eq!(vec3(0.0, 0.0, 1.0), rot * vec); + assert_ulps_eq!(vec3(0.0f32, 0.0f32, 1.0f32), rot * vec); } // tests that the Z rotation is done after the Y #[test] fn test_y_then_z() { - let vec = vec3(0.0, 0.0, 1.0); + let vec = vec3(0.0f32, 0.0f32, 1.0f32); let rot = Quaternion::from(Euler::new(Deg(0.0), Deg(90.0), Deg(90.0))); assert_ulps_eq!(vec3(1.0, 0.0, 0.0), rot * vec); diff --git a/tests/vectorf32.rs b/tests/vectorf32.rs new file mode 100644 index 0000000..fd930e8 --- /dev/null +++ b/tests/vectorf32.rs @@ -0,0 +1,267 @@ +// Copyright 2013-2014 The CGMath Developers. For a full listing of the authors, +// refer to the Cargo.toml file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0f32 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0f32 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +extern crate approx; +#[macro_use] +extern crate cgmath; + +use cgmath::*; +use std::f32; + +#[test] +fn test_constructor() { + assert_eq!(vec2(1f32, 2f32), Vector2::new(1f32, 2f32)); + assert_eq!(vec3(1f32, 2f32, 3f32), Vector3::new(1f32, 2f32, 3f32)); + assert_eq!(vec4(1f32, 2f32, 3f32, 4f32), Vector4::new(1f32, 2f32, 3f32, 4f32)); +} + +#[test] +fn test_from_value() { + assert_eq!(Vector2::from_value(102f32), Vector2::new(102f32, 102f32)); + assert_eq!(Vector3::from_value(22f32), Vector3::new(22f32, 22f32, 22f32)); + assert_eq!(Vector4::from_value(76.5f32), Vector4::new(76.5f32, 76.5f32, 76.5f32, 76.5f32)); +} + +macro_rules! impl_test_add { + ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => ( + // vector + vector ops + assert_eq!($v + $v, $VectorN::new($($v.$field + $v.$field),+)); + assert_eq!(&$v + &$v, $v + $v); + assert_eq!(&$v + $v, $v + $v); + assert_eq!($v + &$v, $v + $v); + ) +} + +macro_rules! impl_test_sub { + ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => ( + // vector - vector ops + assert_eq!($v - $v, $VectorN::new($($v.$field - $v.$field),+)); + assert_eq!(&$v - &$v, $v - $v); + assert_eq!(&$v - $v, $v - $v); + assert_eq!($v - &$v, $v - $v); + ) +} + +macro_rules! impl_test_mul { + ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => ( + // vector * scalar ops + assert_eq!($v * $s, $VectorN::new($($v.$field * $s),+)); + assert_eq!($s * $v, $VectorN::new($($s * $v.$field),+)); + assert_eq!(&$v * $s, $v * $s); + assert_eq!($s * &$v, $s * $v); + // commutativity + assert_eq!($v * $s, $s * $v); + ) +} + +macro_rules! impl_test_div { + ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => ( + // vector / scalar ops + assert_eq!($v / $s, $VectorN::new($($v.$field / $s),+)); + assert_eq!($s / $v, $VectorN::new($($s / $v.$field),+)); + assert_eq!(&$v / $s, $v / $s); + assert_eq!($s / &$v, $s / $v); + ) +} + +macro_rules! impl_test_rem { + ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => ( + // vector % scalar ops + assert_eq!($v % $s, $VectorN::new($($v.$field % $s),+)); + assert_eq!($s % $v, $VectorN::new($($s % $v.$field),+)); + assert_eq!(&$v % $s, $v % $s); + assert_eq!($s % &$v, $s % $v); + ) +} + +#[test] +fn test_add() { + impl_test_add!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); +} + +#[test] +fn test_sub() { + impl_test_sub!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); + impl_test_sub!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); + impl_test_sub!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); +} + +#[test] +fn test_mul() { + impl_test_mul!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); + impl_test_mul!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); + impl_test_mul!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); +} + +#[test] +fn test_div() { + impl_test_div!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); + impl_test_div!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); + impl_test_div!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); +} + +#[test] +fn test_rem() { + impl_test_rem!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); + impl_test_rem!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); + impl_test_rem!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); +} + +#[test] +fn test_dot() { + assert_eq!(Vector2::new(1.0f32, 2.0f32).dot(Vector2::new(3.0f32, 4.0f32)), 11.0f32); + assert_eq!(Vector3::new(1.0f32, 2.0f32, 3.0f32).dot(Vector3::new(4.0f32, 5.0f32, 6.0f32)), 32.0f32); + assert_eq!(Vector4::new(1.0f32, 2.0f32, 3.0f32, 4.0f32).dot(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32)), 70.0f32); +} + +#[test] +fn test_sum() { + assert_eq!(Vector2::new(1f32, 2f32).sum(), 3f32); + assert_eq!(Vector3::new(1f32, 2f32, 3f32).sum(), 6f32); + assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).sum(), 10f32); + + assert_eq!(Vector2::new(3.0f32, 4.0f32).sum(), 7.0f32); + assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).sum(), 15.0f32); + assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).sum(), 26.0f32); +} + +#[test] +fn test_product() { + assert_eq!(Vector2::new(1f32, 2f32).product(), 2f32); + assert_eq!(Vector3::new(1f32, 2f32, 3f32).product(), 6f32); + assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).product(), 24f32); + + assert_eq!(Vector2::new(3.0f32, 4.0f32).product(), 12.0f32); + assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).product(), 120.0f32); + assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).product(), 1680.0f32); +} + +#[test] +fn test_min() { + assert_eq!(Vector2::new(1f32, 2f32).min(), 1f32); + assert_eq!(Vector3::new(1f32, 2f32, 3f32).min(), 1f32); + assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).min(), 1f32); + + assert_eq!(Vector2::new(3.0f32, 4.0f32).min(), 3.0f32); + assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).min(), 4.0f32); + assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).min(), 5.0f32); +} + +#[test] +fn test_max() { + assert_eq!(Vector2::new(1f32, 2f32).max(), 2f32); + assert_eq!(Vector3::new(1f32, 2f32, 3f32).max(), 3f32); + assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).max(), 4f32); + + assert_eq!(Vector2::new(3.0f32, 4.0f32).max(), 4.0f32); + assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).max(), 6.0f32); + assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).max(), 8.0f32); +} + +#[test] +fn test_cross() { + let a = Vector3::new(1f32, 2f32, 3f32); + let b = Vector3::new(4f32, 5f32, 6f32); + let r = Vector3::new(-3f32, 6f32, -3f32); + assert_eq!(a.cross(b), r); +} + +#[test] +fn test_is_perpendicular() { + assert!(Vector2::new(1.0f32, 0.0f32).is_perpendicular(Vector2::new(0.0f32, 1.0f32))); + assert!(Vector3::new(0.0f32, 1.0f32, 0.0f32).is_perpendicular(Vector3::new(0.0f32, 0.0f32, 1.0f32))); + assert!(Vector4::new(1.0f32, 0.0f32, 0.0f32, 0.0f32).is_perpendicular(Vector4::new(0.0f32, 0.0f32, 0.0f32, 1.0f32))); +} + +#[cfg(test)] +mod test_magnitude { + use cgmath::*; + + #[test] + fn test_vector2(){ + let (a, a_res) = (Vector2::new(3.0f32, 4.0f32), 5.0f32); // (3, 4, 5) Pythagorean triple + let (b, b_res) = (Vector2::new(5.0f32, 12.0f32), 13.0f32); // (5, 12, 13) Pythagorean triple + + assert_eq!(a.magnitude2(), a_res * a_res); + assert_eq!(b.magnitude2(), b_res * b_res); + + assert_eq!(a.magnitude(), a_res); + assert_eq!(b.magnitude(), b_res); + } + + #[test] + fn test_vector3(){ + let (a, a_res) = (Vector3::new(2.0f32, 3.0f32, 6.0f32), 7.0f32); // (2, 3, 6, 7) Pythagorean quadruple + let (b, b_res) = (Vector3::new(1.0f32, 4.0f32, 8.0f32), 9.0f32); // (1, 4, 8, 9) Pythagorean quadruple + + assert_eq!(a.magnitude2(), a_res * a_res); + assert_eq!(b.magnitude2(), b_res * b_res); + + assert_eq!(a.magnitude(), a_res); + assert_eq!(b.magnitude(), b_res); + } + + #[test] + fn test_vector4(){ + let (a, a_res) = (Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32), 11.0f32); // (1, 2, 4, 10, 11) Pythagorean quintuple + let (b, b_res) = (Vector4::new(1.0f32, 2.0f32, 8.0f32, 10.0f32), 13.0f32); // (1, 2, 8, 10, 13) Pythagorean quintuple + + assert_eq!(a.magnitude2(), a_res * a_res); + assert_eq!(b.magnitude2(), b_res * b_res); + + assert_eq!(a.magnitude(), a_res); + assert_eq!(b.magnitude(), b_res); + + #[cfg(feature = "use_simd")] + { + let a = Vector4::new(1f32, 4f32, 9f32, 16f32); + assert_ulps_eq!(a.sqrt_element_wide(), Vector4::new(1f32, 2f32, 3f32, 4f32)); + assert_relative_eq!(a.sqrt_element_wide().recip_element_wide(), Vector4::new(1f32, 1f32/2f32, 1f32/3f32, 1f32/4f32), max_relative = 0.005f32); + assert_relative_eq!(a.rsqrt_element_wide(), Vector4::new(1f32, 1f32/2f32, 1f32/3f32, 1f32/4f32), max_relative= 0.005f32); + } + + } +} + +#[test] +fn test_angle() { + assert_ulps_eq!(Vector2::new(1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); + assert_ulps_eq!(Vector2::new(10.0f32, 0.0f32).angle(Vector2::new(0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2)); + assert_ulps_eq!(Vector2::new(-1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &-Rad(f32::consts::FRAC_PI_2)); + + assert_ulps_eq!(Vector3::new(1.0f32, 0.0f32, 1.0f32).angle(Vector3::new(1.0f32, 1.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3)); + assert_ulps_eq!(Vector3::new(10.0f32, 0.0f32, 10.0f32).angle(Vector3::new(5.0f32, 5.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3)); + assert_ulps_eq!(Vector3::new(-1.0f32, 0.0f32, -1.0f32).angle(Vector3::new(1.0f32, -1.0f32, 0.0f32)), &Rad(2.0f32 * f32::consts::FRAC_PI_3)); + + assert_ulps_eq!(Vector4::new(1.0f32, 0.0f32, 1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); + assert_ulps_eq!(Vector4::new(10.0f32, 0.0f32, 10.0f32, 0.0f32).angle(Vector4::new(0.0f32, 5.0f32, 0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2)); + assert_ulps_eq!(Vector4::new(-1.0f32, 0.0f32, -1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); +} + +#[test] +fn test_normalize() { + // TODO: test normalize_to, normalize_sel.0f32, and normalize_self_to + assert_ulps_eq!(Vector2::new(3.0f32, 4.0f32).normalize(), &Vector2::new(3.0f32/5.0f32, 4.0f32/5.0f32)); + assert_ulps_eq!(Vector3::new(2.0f32, 3.0f32, 6.0f32).normalize(), &Vector3::new(2.0f32/7.0f32, 3.0f32/7.0f32, 6.0f32/7.0f32)); + assert_ulps_eq!(Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32).normalize(), &Vector4::new(1.0f32/11.0f32, 2.0f32/11.0f32, 4.0f32/11.0f32, 10.0f32/11.0f32)); +} + +#[test] +fn test_cast() { + assert_ulps_eq!(Vector2::new(0.9f32, 1.5).cast(), Vector2::new(0.9f32, 1.5)); + assert_ulps_eq!(Vector3::new(1.0f32, 2.4, -3.13).cast(), Vector3::new(1.0f32, 2.4, -3.13)); + assert_ulps_eq!(Vector4::new(13.5f32, -4.6, -8.3, 2.41).cast(), Vector4::new(13.5f32, -4.6, -8.3, 2.41)); +} From d45536f1bd75dff6978e80af28eaa2091ae764f9 Mon Sep 17 00:00:00 2001 From: Luxko Date: Sat, 25 Feb 2017 07:42:15 +0800 Subject: [PATCH 2/6] Clean up a bit --- src/matrix.rs | 124 ++------------------------------------------------ src/vector.rs | 1 + 2 files changed, 5 insertions(+), 120 deletions(-) diff --git a/src/matrix.rs b/src/matrix.rs index ccd3b81..dc9f856 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -615,7 +615,7 @@ impl Matrix for Matrix4 { } } -//#[cfg(not(feature = "use_simd"))] + impl SquareMatrix for Matrix4 { type ColumnRow = Vector4; @@ -673,7 +673,7 @@ impl SquareMatrix for Matrix4 { } fn invert(&self) -> Option> { - let det: S = self.determinant(); + let det = self.determinant(); if ulps_eq!(det, &S::zero()) { None } else { let inv_det = S::one() / det; let t = self.transpose(); @@ -732,123 +732,6 @@ impl SquareMatrix for Matrix4 { ulps_eq!(self[3][2], &self[2][3]) } } -// #[cfg(feature = "use_simd")] -// impl SquareMatrix for Matrix4 { -// type ColumnRow = Vector4; - -// #[inline] -// default fn from_value(value: S) -> Matrix4 { -// Matrix4::new(value, S::zero(), S::zero(), S::zero(), -// S::zero(), value, S::zero(), S::zero(), -// S::zero(), S::zero(), value, S::zero(), -// S::zero(), S::zero(), S::zero(), value) -// } - -// #[inline] -// default fn from_diagonal(value: Vector4) -> Matrix4 { -// Matrix4::new(value.x, S::zero(), S::zero(), S::zero(), -// S::zero(), value.y, S::zero(), S::zero(), -// S::zero(), S::zero(), value.z, S::zero(), -// S::zero(), S::zero(), S::zero(), value.w) -// } - -// default fn transpose_self(&mut self) { -// self.swap_elements((0, 1), (1, 0)); -// self.swap_elements((0, 2), (2, 0)); -// self.swap_elements((0, 3), (3, 0)); -// self.swap_elements((1, 2), (2, 1)); -// self.swap_elements((1, 3), (3, 1)); -// self.swap_elements((2, 3), (3, 2)); -// } - -// default fn determinant(&self) -> S { -// let m0 = Matrix3::new(self[1][1], self[2][1], self[3][1], -// self[1][2], self[2][2], self[3][2], -// self[1][3], self[2][3], self[3][3]); -// let m1 = Matrix3::new(self[0][1], self[2][1], self[3][1], -// self[0][2], self[2][2], self[3][2], -// self[0][3], self[2][3], self[3][3]); -// let m2 = Matrix3::new(self[0][1], self[1][1], self[3][1], -// self[0][2], self[1][2], self[3][2], -// self[0][3], self[1][3], self[3][3]); -// let m3 = Matrix3::new(self[0][1], self[1][1], self[2][1], -// self[0][2], self[1][2], self[2][2], -// self[0][3], self[1][3], self[2][3]); - -// self[0][0] * m0.determinant() - -// self[1][0] * m1.determinant() + -// self[2][0] * m2.determinant() - -// self[3][0] * m3.determinant() -// } - -// #[inline] -// default fn diagonal(&self) -> Vector4 { -// Vector4::new(self[0][0], -// self[1][1], -// self[2][2], -// self[3][3]) -// } - -// default fn invert(&self) -> Option> { -// let det = self.determinant(); -// if ulps_eq!(det, &S::zero()) { None } else { -// let inv_det = S::one() / det; -// let t = self.transpose(); -// let cf = |i, j| { -// let mat = match i { -// 0 => Matrix3::from_cols(t.y.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)), -// 1 => Matrix3::from_cols(t.x.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)), -// 2 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.w.truncate_n(j)), -// 3 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.z.truncate_n(j)), -// _ => panic!("out of range"), -// }; -// let sign = if (i + j) & 1 == 1 { -S::one() } else { S::one() }; -// mat.determinant() * sign * inv_det -// }; - -// Some(Matrix4::new(cf(0, 0), cf(0, 1), cf(0, 2), cf(0, 3), -// cf(1, 0), cf(1, 1), cf(1, 2), cf(1, 3), -// cf(2, 0), cf(2, 1), cf(2, 2), cf(2, 3), -// cf(3, 0), cf(3, 1), cf(3, 2), cf(3, 3))) -// } -// } - -// default fn is_diagonal(&self) -> bool { -// ulps_eq!(self[0][1], &S::zero()) && -// ulps_eq!(self[0][2], &S::zero()) && -// ulps_eq!(self[0][3], &S::zero()) && - -// ulps_eq!(self[1][0], &S::zero()) && -// ulps_eq!(self[1][2], &S::zero()) && -// ulps_eq!(self[1][3], &S::zero()) && - -// ulps_eq!(self[2][0], &S::zero()) && -// ulps_eq!(self[2][1], &S::zero()) && -// ulps_eq!(self[2][3], &S::zero()) && - -// ulps_eq!(self[3][0], &S::zero()) && -// ulps_eq!(self[3][1], &S::zero()) && -// ulps_eq!(self[3][2], &S::zero()) -// } - -// default fn is_symmetric(&self) -> bool { -// ulps_eq!(self[0][1], &self[1][0]) && -// ulps_eq!(self[0][2], &self[2][0]) && -// ulps_eq!(self[0][3], &self[3][0]) && - -// ulps_eq!(self[1][0], &self[0][1]) && -// ulps_eq!(self[1][2], &self[2][1]) && -// ulps_eq!(self[1][3], &self[3][1]) && - -// ulps_eq!(self[2][0], &self[0][2]) && -// ulps_eq!(self[2][1], &self[1][2]) && -// ulps_eq!(self[2][3], &self[3][2]) && - -// ulps_eq!(self[3][0], &self[0][3]) && -// ulps_eq!(self[3][1], &self[1][3]) && -// ulps_eq!(self[3][2], &self[2][3]) -// } -// } impl ApproxEq for Matrix2 { type Epsilon = S::Epsilon; @@ -1452,7 +1335,8 @@ impl Rand for Matrix4 { } } -// Sadly buggy. +// Specialization strangely won't work for this. +// TODO: find another way // #[cfg(feature = "use_simd")] // impl SquareMatrix for Matrix4 { // fn determinant(&self) -> f32 { diff --git a/src/vector.rs b/src/vector.rs index 8e37c83..f3828a6 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -299,6 +299,7 @@ macro_rules! impl_vector { } // Utility macro for generating associated functions for the vectors +// mainly duplication #[cfg(feature = "use_simd")] macro_rules! impl_vector_default { ($VectorN:ident { $($field:ident),+ }, $n:expr, $constructor:ident) => { From 68d1d272221e0f8eb0203b7021d45b0e3f4acf57 Mon Sep 17 00:00:00 2001 From: Luxko Date: Sat, 25 Feb 2017 20:31:38 +0800 Subject: [PATCH 3/6] Add SIMD support for determinant and inverse Also: - Change the travis script: no use `cargo bench`ing in beta or stable. - Add bench test for Matrix4's determinant(). - clean code a bit --- .travis.yml | 4 +- benches/mat.rs | 2 + src/macros.rs | 59 +-------------------------- src/matrix.rs | 106 +++++++++++++++++++++++++------------------------ 4 files changed, 61 insertions(+), 110 deletions(-) diff --git a/.travis.yml b/.travis.yml index 38d6b8d..9768b66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,6 @@ rust: - stable script: - - cargo build && cargo test && cargo bench + - cargo build && cargo test + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo bench; + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo build --features "use_simd" && cargo test --features "use_simd" && cargo bench --features "use_simd" diff --git a/benches/mat.rs b/benches/mat.rs index f472be2..9d94a72 100644 --- a/benches/mat.rs +++ b/benches/mat.rs @@ -59,3 +59,5 @@ bench_unop!(_bench_matrix4_invert, Matrix4, invert); bench_unop!(_bench_matrix2_transpose, Matrix2, transpose); bench_unop!(_bench_matrix3_transpose, Matrix3, transpose); bench_unop!(_bench_matrix4_transpose, Matrix4, transpose); + +bench_unop!(_bench_matrix4_determinant, Matrix4, determinant); \ No newline at end of file diff --git a/src/macros.rs b/src/macros.rs index ac741e8..e1fa8c6 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -381,21 +381,11 @@ macro_rules! impl_operator_simd { let $x: $Simd = self.into(); $body } } - - // #[cfg(feature = "simd")] - // impl<'a> $Op for &'a $Lhs { - // type Output = $Output; - // #[inline] - // fn $op(self) -> $Output { - // let $x: $Simd = (*self).into(); $body - // } - // } }; // When the right operand is a scalar (@rs [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } }) => { - impl $Op<$Rhs> for $Lhs { #[inline] fn $op(self, other: $Rhs) -> $Output { @@ -411,6 +401,7 @@ macro_rules! impl_operator_simd { } } }; + // When the right operand is a compound type ([$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } @@ -430,7 +421,6 @@ macro_rules! impl_operator_simd { let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), (*other).into()); $body } } - impl<'a> $Op<$Rhs> for &'a $Lhs { #[inline] @@ -439,7 +429,6 @@ macro_rules! impl_operator_simd { } } - impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { #[inline] fn $op(self, other: &'a $Rhs) -> $Output { @@ -447,18 +436,17 @@ macro_rules! impl_operator_simd { } } }; + // When the left operand is a scalar (@ls [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ident { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } }) => { - impl $Op<$Rhs> for $Lhs { #[inline] fn $op(self, other: $Rhs) -> $Output { let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), other.into()); $body } } - impl<'a> $Op<&'a $Rhs> for $Lhs { #[inline] @@ -467,47 +455,4 @@ macro_rules! impl_operator_simd { } } }; - - // // When left is row-vec, right is colume-matrix - // (@vm [$Simd: ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { - // fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { - - // } - // }) - - // When matrix with matrix - (@mm $Op:ident<$Rhs:ty> for $Lhs:ty { - fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body: expr } - }) => { - impl $Op<$Rhs> for $Lhs { - #[inline] - fn $op(self, other: $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a> $Op<&'a $Rhs> for $Lhs { - #[inline] - fn $op(self, other: &'a $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a> $Op<$Rhs> for &'a $Lhs { - #[inline] - fn $op(self, other: $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { - #[inline] - fn $op(self, other: &'a $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - } } diff --git a/src/matrix.rs b/src/matrix.rs index dc9f856..397d2ba 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -645,23 +645,10 @@ impl SquareMatrix for Matrix4 { } fn determinant(&self) -> S { - let m0 = Matrix3::new(self[1][1], self[2][1], self[3][1], - self[1][2], self[2][2], self[3][2], - self[1][3], self[2][3], self[3][3]); - let m1 = Matrix3::new(self[0][1], self[2][1], self[3][1], - self[0][2], self[2][2], self[3][2], - self[0][3], self[2][3], self[3][3]); - let m2 = Matrix3::new(self[0][1], self[1][1], self[3][1], - self[0][2], self[1][2], self[3][2], - self[0][3], self[1][3], self[3][3]); - let m3 = Matrix3::new(self[0][1], self[1][1], self[2][1], - self[0][2], self[1][2], self[2][2], - self[0][3], self[1][3], self[2][3]); - - self[0][0] * m0.determinant() - - self[1][0] * m1.determinant() + - self[2][0] * m2.determinant() - - self[3][0] * m3.determinant() + let tmp = unsafe { + det_sub_proc_unsafe(self, 1, 2, 3) + }; + tmp.dot(Vector4::new(self[0][0], self[1][0], self[2][0], self[3][0])) } #[inline] @@ -672,6 +659,12 @@ impl SquareMatrix for Matrix4 { self[3][3]) } + // The new implementation results in negative optimization when used + // without SIMD. so we opt them in with configuration. + // A better option would be using specialization. But currently somewhat + // specialization is too buggy, and it won't apply here. I'm getting + // weird error msgs. Help wanted. + #[cfg(not(feature = "use_simd"))] fn invert(&self) -> Option> { let det = self.determinant(); if ulps_eq!(det, &S::zero()) { None } else { @@ -695,6 +688,27 @@ impl SquareMatrix for Matrix4 { cf(3, 0), cf(3, 1), cf(3, 2), cf(3, 3))) } } + #[cfg(feature = "use_simd")] + fn invert(&self) -> Option> { + let tmp0 = unsafe { + det_sub_proc_unsafe(self, 1, 2, 3) + }; + let det = tmp0.dot(Vector4::new(self[0][0], self[1][0], self[2][0], self[3][0])); + if ulps_eq!(det, &S::zero()) { None } else { + let inv_det = S::one() / det; + let tmp0 = tmp0 * inv_det; + let tmp1 = unsafe { + det_sub_proc_unsafe(self, 0, 3, 2) * inv_det + }; + let tmp2 = unsafe { + det_sub_proc_unsafe(self, 0, 1, 3) * inv_det + }; + let tmp3 = unsafe { + det_sub_proc_unsafe(self, 0, 2, 1) * inv_det + }; + Some(Matrix4::from_cols(tmp0, tmp1, tmp2, tmp3)) + } + } fn is_diagonal(&self) -> bool { ulps_eq!(self[0][1], &S::zero()) && @@ -1036,6 +1050,7 @@ impl_operator!( Mul > for Matrix3 { // causes the LLVM to miss identical loads and multiplies. This optimization // causes the code to be auto vectorized properly increasing the performance // around ~4 times. +// Update: this should now be a bit more efficient impl_operator!( Mul > for Matrix4 { fn mul(lhs, rhs) -> Matrix4 { @@ -1335,39 +1350,26 @@ impl Rand for Matrix4 { } } -// Specialization strangely won't work for this. -// TODO: find another way -// #[cfg(feature = "use_simd")] -// impl SquareMatrix for Matrix4 { -// fn determinant(&self) -> f32 { -// let a = Simdf32x4::new(self.z[1], self.x[1], self.w[1], self.y[1]); -// let b = Simdf32x4::new(self.y[2], self.y[2], self.z[2], self.z[2]); -// let c = Simdf32x4::new(self.x[3], self.z[3], self.x[3], self.z[3]); -// let mut tmp = a * (b * c); -// let d = Simdf32x4::new(self.y[1], self.y[1], self.z[1], self.z[1]); -// let e = Simdf32x4::new(self.x[2], self.z[2], self.x[2], self.z[2]); -// let f = Simdf32x4::new(self.z[3], self.x[3], self.w[3], self.y[3]); -// let tmp1 = d * (e * f); -// tmp = tmp + tmp1; -// let g = Simdf32x4::new(self.x[1], self.z[1], self.x[1], self.z[1]); -// let h = Simdf32x4::new(self.z[2], self.x[2], self.w[2], self.y[2]); -// let i = Simdf32x4::new(self.y[3], self.y[3], self.z[3], self.z[3]); -// let tmp1 = g * (h * i); -// tmp = tmp + tmp1; -// let tmp1 = g * (b * f); -// tmp = tmp - tmp1; -// let tmp1 = d * (h * c); -// tmp = tmp - tmp1; -// let tmp1 = a * (e * i); -// tmp = tmp - tmp1; -// let tmp: Vector4 = (tmp * Simdf32x4::new(self.x[0], self.y[0], self.z[0], self.w[0])).into(); -// tmp.sum() -// } -// } +// Sub procedure for SIMD when dealing with determinant and inversion +#[inline] +unsafe fn det_sub_proc_unsafe(m: &Matrix4, x: usize, y: usize, z: usize) -> Vector4 { + let s: &[S; 16] = m.as_ref(); + let a = Vector4::new(*s.get_unchecked(4 + x), *s.get_unchecked(12 + x), *s.get_unchecked(x), *s.get_unchecked(8 + x)); + let b = Vector4::new(*s.get_unchecked(8 + y), *s.get_unchecked(8 + y), *s.get_unchecked(4 + y), *s.get_unchecked(4 + y)); + let c = Vector4::new(*s.get_unchecked(12 + z), *s.get_unchecked(z), *s.get_unchecked(12 + z), *s.get_unchecked(z)); -// #[cfg(feature = "use_simd")] -// impl_operator_simd!(@mm Mul> for Matrix4 { -// fn mul(matrix, vector) -> Vector4 { -// matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3] -// } -// }); \ No newline at end of file + let d = Vector4::new(*s.get_unchecked(8 + x), *s.get_unchecked(8 + x), *s.get_unchecked(4 + x), *s.get_unchecked(4 + x)); + let e = Vector4::new(*s.get_unchecked(12 + y), *s.get_unchecked(y), *s.get_unchecked(12 + y), *s.get_unchecked(y)); + let f = Vector4::new(*s.get_unchecked(4 + z), *s.get_unchecked(12 + z), *s.get_unchecked(z), *s.get_unchecked(8 + z)); + + let g = Vector4::new(*s.get_unchecked(12 + x), *s.get_unchecked(x), *s.get_unchecked(12 + x), *s.get_unchecked(x)); + let h = Vector4::new(*s.get_unchecked(4 + y), *s.get_unchecked(12 + y), *s.get_unchecked(y), *s.get_unchecked(8 + y)); + let i = Vector4::new(*s.get_unchecked(8 + z), *s.get_unchecked(8 + z), *s.get_unchecked(4 + z), *s.get_unchecked(4 + z)); + let mut tmp = a.mul_element_wise(b.mul_element_wise(c)); + tmp += d.mul_element_wise(e.mul_element_wise(f)); + tmp += g.mul_element_wise(h.mul_element_wise(i)); + tmp -= a.mul_element_wise(e.mul_element_wise(i)); + tmp -= d.mul_element_wise(h.mul_element_wise(c)); + tmp -= g.mul_element_wise(b.mul_element_wise(f)); + tmp +} \ No newline at end of file From 17699b7ec35e0112a9923bc922e474226b18469a Mon Sep 17 00:00:00 2001 From: Luxko Date: Sat, 25 Feb 2017 20:47:43 +0800 Subject: [PATCH 4/6] fix build script --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9768b66..7539fd4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,5 @@ rust: script: - cargo build && cargo test - - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo bench; - - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo build --features "use_simd" && cargo test --features "use_simd" && cargo bench --features "use_simd" + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo bench; fi + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo build --features "use_simd" && cargo test --features "use_simd" && cargo bench --features "use_simd"; fi From 09c8727c7c22ff9a8ccb65e71b6719b4a7f6f0df Mon Sep 17 00:00:00 2001 From: Luxko Date: Sun, 12 Mar 2017 15:59:06 +0800 Subject: [PATCH 5/6] Add newline at end of file --- benches/mat.rs | 2 +- src/matrix.rs | 2 +- tests/{vectorf32.rs => vector4f32.rs} | 76 --------------------------- 3 files changed, 2 insertions(+), 78 deletions(-) rename tests/{vectorf32.rs => vector4f32.rs} (58%) diff --git a/benches/mat.rs b/benches/mat.rs index 9d94a72..88efab9 100644 --- a/benches/mat.rs +++ b/benches/mat.rs @@ -60,4 +60,4 @@ bench_unop!(_bench_matrix2_transpose, Matrix2, transpose); bench_unop!(_bench_matrix3_transpose, Matrix3, transpose); bench_unop!(_bench_matrix4_transpose, Matrix4, transpose); -bench_unop!(_bench_matrix4_determinant, Matrix4, determinant); \ No newline at end of file +bench_unop!(_bench_matrix4_determinant, Matrix4, determinant); diff --git a/src/matrix.rs b/src/matrix.rs index 397d2ba..e900c66 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -1372,4 +1372,4 @@ unsafe fn det_sub_proc_unsafe(m: &Matrix4, x: usize, y: usize, tmp -= d.mul_element_wise(h.mul_element_wise(c)); tmp -= g.mul_element_wise(b.mul_element_wise(f)); tmp -} \ No newline at end of file +} diff --git a/tests/vectorf32.rs b/tests/vector4f32.rs similarity index 58% rename from tests/vectorf32.rs rename to tests/vector4f32.rs index fd930e8..b860d76 100644 --- a/tests/vectorf32.rs +++ b/tests/vector4f32.rs @@ -23,15 +23,11 @@ use std::f32; #[test] fn test_constructor() { - assert_eq!(vec2(1f32, 2f32), Vector2::new(1f32, 2f32)); - assert_eq!(vec3(1f32, 2f32, 3f32), Vector3::new(1f32, 2f32, 3f32)); assert_eq!(vec4(1f32, 2f32, 3f32, 4f32), Vector4::new(1f32, 2f32, 3f32, 4f32)); } #[test] fn test_from_value() { - assert_eq!(Vector2::from_value(102f32), Vector2::new(102f32, 102f32)); - assert_eq!(Vector3::from_value(22f32), Vector3::new(22f32, 22f32, 22f32)); assert_eq!(Vector4::from_value(76.5f32), Vector4::new(76.5f32, 76.5f32, 76.5f32, 76.5f32)); } @@ -95,94 +91,58 @@ fn test_add() { #[test] fn test_sub() { impl_test_sub!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); - impl_test_sub!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); - impl_test_sub!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); } #[test] fn test_mul() { impl_test_mul!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); - impl_test_mul!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); - impl_test_mul!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); } #[test] fn test_div() { impl_test_div!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); - impl_test_div!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); - impl_test_div!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); } #[test] fn test_rem() { impl_test_rem!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32)); - impl_test_rem!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32)); - impl_test_rem!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32)); } #[test] fn test_dot() { - assert_eq!(Vector2::new(1.0f32, 2.0f32).dot(Vector2::new(3.0f32, 4.0f32)), 11.0f32); - assert_eq!(Vector3::new(1.0f32, 2.0f32, 3.0f32).dot(Vector3::new(4.0f32, 5.0f32, 6.0f32)), 32.0f32); assert_eq!(Vector4::new(1.0f32, 2.0f32, 3.0f32, 4.0f32).dot(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32)), 70.0f32); } #[test] fn test_sum() { - assert_eq!(Vector2::new(1f32, 2f32).sum(), 3f32); - assert_eq!(Vector3::new(1f32, 2f32, 3f32).sum(), 6f32); assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).sum(), 10f32); - assert_eq!(Vector2::new(3.0f32, 4.0f32).sum(), 7.0f32); - assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).sum(), 15.0f32); assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).sum(), 26.0f32); } #[test] fn test_product() { - assert_eq!(Vector2::new(1f32, 2f32).product(), 2f32); - assert_eq!(Vector3::new(1f32, 2f32, 3f32).product(), 6f32); assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).product(), 24f32); - assert_eq!(Vector2::new(3.0f32, 4.0f32).product(), 12.0f32); - assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).product(), 120.0f32); assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).product(), 1680.0f32); } #[test] fn test_min() { - assert_eq!(Vector2::new(1f32, 2f32).min(), 1f32); - assert_eq!(Vector3::new(1f32, 2f32, 3f32).min(), 1f32); assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).min(), 1f32); - assert_eq!(Vector2::new(3.0f32, 4.0f32).min(), 3.0f32); - assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).min(), 4.0f32); assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).min(), 5.0f32); } #[test] fn test_max() { - assert_eq!(Vector2::new(1f32, 2f32).max(), 2f32); - assert_eq!(Vector3::new(1f32, 2f32, 3f32).max(), 3f32); assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).max(), 4f32); - assert_eq!(Vector2::new(3.0f32, 4.0f32).max(), 4.0f32); - assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).max(), 6.0f32); assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).max(), 8.0f32); } -#[test] -fn test_cross() { - let a = Vector3::new(1f32, 2f32, 3f32); - let b = Vector3::new(4f32, 5f32, 6f32); - let r = Vector3::new(-3f32, 6f32, -3f32); - assert_eq!(a.cross(b), r); -} - #[test] fn test_is_perpendicular() { - assert!(Vector2::new(1.0f32, 0.0f32).is_perpendicular(Vector2::new(0.0f32, 1.0f32))); - assert!(Vector3::new(0.0f32, 1.0f32, 0.0f32).is_perpendicular(Vector3::new(0.0f32, 0.0f32, 1.0f32))); assert!(Vector4::new(1.0f32, 0.0f32, 0.0f32, 0.0f32).is_perpendicular(Vector4::new(0.0f32, 0.0f32, 0.0f32, 1.0f32))); } @@ -190,30 +150,6 @@ fn test_is_perpendicular() { mod test_magnitude { use cgmath::*; - #[test] - fn test_vector2(){ - let (a, a_res) = (Vector2::new(3.0f32, 4.0f32), 5.0f32); // (3, 4, 5) Pythagorean triple - let (b, b_res) = (Vector2::new(5.0f32, 12.0f32), 13.0f32); // (5, 12, 13) Pythagorean triple - - assert_eq!(a.magnitude2(), a_res * a_res); - assert_eq!(b.magnitude2(), b_res * b_res); - - assert_eq!(a.magnitude(), a_res); - assert_eq!(b.magnitude(), b_res); - } - - #[test] - fn test_vector3(){ - let (a, a_res) = (Vector3::new(2.0f32, 3.0f32, 6.0f32), 7.0f32); // (2, 3, 6, 7) Pythagorean quadruple - let (b, b_res) = (Vector3::new(1.0f32, 4.0f32, 8.0f32), 9.0f32); // (1, 4, 8, 9) Pythagorean quadruple - - assert_eq!(a.magnitude2(), a_res * a_res); - assert_eq!(b.magnitude2(), b_res * b_res); - - assert_eq!(a.magnitude(), a_res); - assert_eq!(b.magnitude(), b_res); - } - #[test] fn test_vector4(){ let (a, a_res) = (Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32), 11.0f32); // (1, 2, 4, 10, 11) Pythagorean quintuple @@ -238,14 +174,6 @@ mod test_magnitude { #[test] fn test_angle() { - assert_ulps_eq!(Vector2::new(1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); - assert_ulps_eq!(Vector2::new(10.0f32, 0.0f32).angle(Vector2::new(0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2)); - assert_ulps_eq!(Vector2::new(-1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &-Rad(f32::consts::FRAC_PI_2)); - - assert_ulps_eq!(Vector3::new(1.0f32, 0.0f32, 1.0f32).angle(Vector3::new(1.0f32, 1.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3)); - assert_ulps_eq!(Vector3::new(10.0f32, 0.0f32, 10.0f32).angle(Vector3::new(5.0f32, 5.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3)); - assert_ulps_eq!(Vector3::new(-1.0f32, 0.0f32, -1.0f32).angle(Vector3::new(1.0f32, -1.0f32, 0.0f32)), &Rad(2.0f32 * f32::consts::FRAC_PI_3)); - assert_ulps_eq!(Vector4::new(1.0f32, 0.0f32, 1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); assert_ulps_eq!(Vector4::new(10.0f32, 0.0f32, 10.0f32, 0.0f32).angle(Vector4::new(0.0f32, 5.0f32, 0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2)); assert_ulps_eq!(Vector4::new(-1.0f32, 0.0f32, -1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2)); @@ -254,14 +182,10 @@ fn test_angle() { #[test] fn test_normalize() { // TODO: test normalize_to, normalize_sel.0f32, and normalize_self_to - assert_ulps_eq!(Vector2::new(3.0f32, 4.0f32).normalize(), &Vector2::new(3.0f32/5.0f32, 4.0f32/5.0f32)); - assert_ulps_eq!(Vector3::new(2.0f32, 3.0f32, 6.0f32).normalize(), &Vector3::new(2.0f32/7.0f32, 3.0f32/7.0f32, 6.0f32/7.0f32)); assert_ulps_eq!(Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32).normalize(), &Vector4::new(1.0f32/11.0f32, 2.0f32/11.0f32, 4.0f32/11.0f32, 10.0f32/11.0f32)); } #[test] fn test_cast() { - assert_ulps_eq!(Vector2::new(0.9f32, 1.5).cast(), Vector2::new(0.9f32, 1.5)); - assert_ulps_eq!(Vector3::new(1.0f32, 2.4, -3.13).cast(), Vector3::new(1.0f32, 2.4, -3.13)); assert_ulps_eq!(Vector4::new(13.5f32, -4.6, -8.3, 2.41).cast(), Vector4::new(13.5f32, -4.6, -8.3, 2.41)); } From 194c4770e968ed4c10fc326c6d9ec541b42a901a Mon Sep 17 00:00:00 2001 From: Luxko Date: Sun, 12 Mar 2017 20:44:58 +0800 Subject: [PATCH 6/6] add basic SIMD support for Quaternion --- src/quaternion.rs | 234 +++++++++++++++++++++++++++++++++++++++++++++- src/vector.rs | 18 ++-- 2 files changed, 240 insertions(+), 12 deletions(-) diff --git a/src/quaternion.rs b/src/quaternion.rs index c97aea6..384bf1e 100644 --- a/src/quaternion.rs +++ b/src/quaternion.rs @@ -30,6 +30,8 @@ use point::Point3; use rotation::{Rotation, Rotation3, Basis3}; use vector::Vector3; +#[cfg(feature = "use_simd")] +use simd::f32x4 as Simdf32x4; /// A [quaternion](https://en.wikipedia.org/wiki/Quaternion) in scalar/vector /// form. @@ -46,6 +48,30 @@ pub struct Quaternion { pub v: Vector3, } +#[cfg(feature = "use_simd")] +impl From for Quaternion { + #[inline] + fn from(f: Simdf32x4) -> Self { + unsafe { + let mut ret: Self = mem::uninitialized(); + { + let ret_mut: &mut [f32; 4] = ret.as_mut(); + f.store(ret_mut.as_mut(), 0 as usize); + } + ret + } + } +} + +#[cfg(feature = "use_simd")] +impl Into for Quaternion { + #[inline] + fn into(self) -> Simdf32x4 { + let self_ref: &[f32; 4] = self.as_ref(); + Simdf32x4::load(self_ref.as_ref(), 0 as usize) + } +} + impl Quaternion { /// Construct a new quaternion from one scalar component and three /// imaginary components @@ -73,7 +99,7 @@ impl Quaternion { let mag_avg = (src.magnitude2() * dst.magnitude2()).sqrt(); let dot = src.dot(dst); if ulps_eq!(dot, &mag_avg) { - Quaternion::one() + Quaternion::::one() } else if ulps_eq!(dot, &-mag_avg) { let axis = fallback.unwrap_or_else(|| { let mut v = Vector3::unit_x().cross(src); @@ -151,7 +177,7 @@ impl Zero for Quaternion { #[inline] fn is_zero(&self) -> bool { - ulps_eq!(self, &Quaternion::zero()) + ulps_eq!(self, &Quaternion::::zero()) } } @@ -175,6 +201,7 @@ impl MetricSpace for Quaternion { } } +#[cfg(not(feature = "use_simd"))] impl InnerSpace for Quaternion { #[inline] fn dot(self, other: Quaternion) -> S { @@ -182,6 +209,25 @@ impl InnerSpace for Quaternion { } } +#[cfg(feature = "use_simd")] +impl InnerSpace for Quaternion { + #[inline] + default fn dot(self, other: Quaternion) -> S { + self.s * other.s + self.v.dot(other.v) + } +} + +#[cfg(feature = "use_simd")] +impl InnerSpace for Quaternion { + #[inline] + fn dot(self, other: Quaternion) -> f32 { + let lhs: Simdf32x4 = self.into(); + let rhs: Simdf32x4 = other.into(); + let r = lhs * rhs; + r.extract(0) + r.extract(1) + r.extract(2) + r.extract(3) + } +} + impl From> for Quaternion<::Unitless> where A: Angle + Into::Unitless>>, { @@ -203,35 +249,119 @@ impl From> for Quaternion<::Unitless> where } } +#[cfg(not(feature = "use_simd"))] impl_operator!( Neg for Quaternion { fn neg(quat) -> Quaternion { Quaternion::from_sv(-quat.s, -quat.v) } }); +#[cfg(feature = "use_simd")] +impl_operator_default!( Neg for Quaternion { + fn neg(quat) -> Quaternion { + Quaternion::from_sv(-quat.s, -quat.v) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Neg for Quaternion { + fn neg(lhs) -> Quaternion { + (-lhs).into() + } + } +} + +#[cfg(not(feature = "use_simd"))] impl_operator!( Mul for Quaternion { fn mul(lhs, rhs) -> Quaternion { Quaternion::from_sv(lhs.s * rhs, lhs.v * rhs) } }); + +#[cfg(feature = "use_simd")] +impl_operator_default!( Mul for Quaternion { + fn mul(lhs, rhs) -> Quaternion { + Quaternion::from_sv(lhs.s * rhs, lhs.v * rhs) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdf32x4]; Mul for Quaternion { + fn mul(lhs, rhs) -> Quaternion { + (lhs * rhs).into() + } + } +} + +#[cfg(not(feature = "use_simd"))] impl_assignment_operator!( MulAssign for Quaternion { fn mul_assign(&mut self, scalar) { self.s *= scalar; self.v *= scalar; } }); +#[cfg(feature = "use_simd")] +impl_assignment_operator_default!( MulAssign for Quaternion { + fn mul_assign(&mut self, scalar) { self.s *= scalar; self.v *= scalar; } +}); + +#[cfg(feature = "use_simd")] +impl MulAssign for Quaternion { + fn mul_assign(&mut self, other: f32) { + let s: Simdf32x4 = (*self).into(); + let other = Simdf32x4::splat(other); + *self = (s * other).into(); + } +} + +#[cfg(not(feature = "use_simd"))] impl_operator!( Div for Quaternion { fn div(lhs, rhs) -> Quaternion { Quaternion::from_sv(lhs.s / rhs, lhs.v / rhs) } }); + +#[cfg(feature = "use_simd")] +impl_operator_default!( Div for Quaternion { + fn div(lhs, rhs) -> Quaternion { + Quaternion::from_sv(lhs.s / rhs, lhs.v / rhs) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{@rs + [Simdf32x4]; Div for Quaternion { + fn div(lhs, rhs) -> Quaternion { + (lhs / rhs).into() + } + } +} + +#[cfg(not(feature = "use_simd"))] impl_assignment_operator!( DivAssign for Quaternion { fn div_assign(&mut self, scalar) { self.s /= scalar; self.v /= scalar; } }); +#[cfg(feature = "use_simd")] +impl_assignment_operator_default!( DivAssign for Quaternion { + fn div_assign(&mut self, scalar) { self.s /= scalar; self.v /= scalar; } +}); + +#[cfg(feature = "use_simd")] +impl DivAssign for Quaternion { + fn div_assign(&mut self, other: f32) { + let s: Simdf32x4 = (*self).into(); + let other = Simdf32x4::splat(other); + *self = (s / other).into(); + } +} + impl_operator!( Rem for Quaternion { fn rem(lhs, rhs) -> Quaternion { Quaternion::from_sv(lhs.s % rhs, lhs.v % rhs) } }); + impl_assignment_operator!( RemAssign for Quaternion { fn rem_assign(&mut self, scalar) { self.s %= scalar; self.v %= scalar; } }); @@ -245,24 +375,93 @@ impl_operator!( Mul > for Quaternion { }} }); +#[cfg(not(feature = "use_simd"))] impl_operator!( Add > for Quaternion { fn add(lhs, rhs) -> Quaternion { Quaternion::from_sv(lhs.s + rhs.s, lhs.v + rhs.v) } }); + +#[cfg(feature = "use_simd")] +impl_operator_default!( Add > for Quaternion { + fn add(lhs, rhs) -> Quaternion { + Quaternion::from_sv(lhs.s + rhs.s, lhs.v + rhs.v) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Add> for Quaternion { + fn add(lhs, rhs) -> Quaternion { + (lhs + rhs).into() + } + } +} + +#[cfg(not(feature = "use_simd"))] impl_assignment_operator!( AddAssign > for Quaternion { fn add_assign(&mut self, other) { self.s += other.s; self.v += other.v; } }); +#[cfg(feature = "use_simd")] +impl_assignment_operator_default!( AddAssign > for Quaternion { + fn add_assign(&mut self, other) { self.s += other.s; self.v += other.v; } +}); + +#[cfg(feature = "use_simd")] +impl AddAssign for Quaternion { + #[inline] + fn add_assign(&mut self, rhs: Self) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s + rhs).into(); + } +} + +#[cfg(not(feature = "use_simd"))] impl_operator!( Sub > for Quaternion { fn sub(lhs, rhs) -> Quaternion { Quaternion::from_sv(lhs.s - rhs.s, lhs.v - rhs.v) } }); + +#[cfg(feature = "use_simd")] +impl_operator_default!( Sub > for Quaternion { + fn sub(lhs, rhs) -> Quaternion { + Quaternion::from_sv(lhs.s - rhs.s, lhs.v - rhs.v) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Sub> for Quaternion { + fn sub(lhs, rhs) -> Quaternion { + (lhs - rhs).into() + } + } +} + +#[cfg(not(feature = "use_simd"))] impl_assignment_operator!( SubAssign > for Quaternion { fn sub_assign(&mut self, other) { self.s -= other.s; self.v -= other.v; } }); +#[cfg(feature = "use_simd")] +impl_assignment_operator_default!( SubAssign > for Quaternion { + fn sub_assign(&mut self, other) { self.s -= other.s; self.v -= other.v; } +}); + +#[cfg(feature = "use_simd")] +impl SubAssign for Quaternion { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + let s: Simdf32x4 = (*self).into(); + let rhs: Simdf32x4 = rhs.into(); + *self = (s - rhs).into(); + } +} + +#[cfg(not(feature = "use_simd"))] impl_operator!( Mul > for Quaternion { fn mul(lhs, rhs) -> Quaternion { Quaternion::new(lhs.s * rhs.s - lhs.v.x * rhs.v.x - lhs.v.y * rhs.v.y - lhs.v.z * rhs.v.z, @@ -272,6 +471,37 @@ impl_operator!( Mul > for Quaternion { } }); +#[cfg(feature = "use_simd")] +impl_operator_default!( Mul > for Quaternion { + fn mul(lhs, rhs) -> Quaternion { + Quaternion::new(lhs.s * rhs.s - lhs.v.x * rhs.v.x - lhs.v.y * rhs.v.y - lhs.v.z * rhs.v.z, + lhs.s * rhs.v.x + lhs.v.x * rhs.s + lhs.v.y * rhs.v.z - lhs.v.z * rhs.v.y, + lhs.s * rhs.v.y + lhs.v.y * rhs.s + lhs.v.z * rhs.v.x - lhs.v.x * rhs.v.z, + lhs.s * rhs.v.z + lhs.v.z * rhs.s + lhs.v.x * rhs.v.y - lhs.v.y * rhs.v.x) + } +}); + +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Mul> for Quaternion { + fn mul(lhs, rhs) -> Quaternion { + { + let p0 = Simdf32x4::splat(lhs.extract(0)) * rhs; + let p1 = Simdf32x4::splat(lhs.extract(1)) * Simdf32x4::new( + -rhs.extract(1), rhs.extract(0), -rhs.extract(3), rhs.extract(2) + ); + let p2 = Simdf32x4::splat(lhs.extract(2)) * Simdf32x4::new( + -rhs.extract(2), rhs.extract(3), rhs.extract(0), -rhs.extract(1) + ); + let p3 = Simdf32x4::splat(lhs.extract(3)) * Simdf32x4::new( + -rhs.extract(3), -rhs.extract(2), rhs.extract(1), rhs.extract(0) + ); + (p0 + p1 + p2 + p3).into() + } + } + } +} + macro_rules! impl_scalar_mul { ($S:ident) => { impl_operator!(Mul> for $S { diff --git a/src/vector.rs b/src/vector.rs index f3828a6..a289614 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -802,6 +802,14 @@ impl Into for Vector4 { } } +#[cfg(feature = "use_simd")] +impl_operator_simd!{ + [Simdf32x4]; Add> for Vector4 { + fn add(lhs, rhs) -> Vector4 { + (lhs + rhs).into() + } + } +} #[cfg(feature = "use_simd")] impl_operator_simd!{ @@ -1050,7 +1058,6 @@ impl Into for Vector4 { } } - #[cfg(feature = "use_simd")] impl_operator_simd!{ [Simdu32x4]; Add> for Vector4 { @@ -1060,15 +1067,6 @@ impl_operator_simd!{ } } -#[cfg(feature = "use_simd")] -impl_operator_simd!{ - [Simdf32x4]; Add> for Vector4 { - fn add(lhs, rhs) -> Vector4 { - (lhs + rhs).into() - } - } -} - #[cfg(feature = "use_simd")] impl_operator_simd!{ [Simdu32x4]; Sub> for Vector4 {