diff --git a/.travis.yml b/.travis.yml index 38d6b8d..9768b66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,6 @@ rust: - stable script: - - cargo build && cargo test && cargo bench + - cargo build && cargo test + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo bench; + - if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then cargo build --features "use_simd" && cargo test --features "use_simd" && cargo bench --features "use_simd" diff --git a/benches/mat.rs b/benches/mat.rs index f472be2..9d94a72 100644 --- a/benches/mat.rs +++ b/benches/mat.rs @@ -59,3 +59,5 @@ bench_unop!(_bench_matrix4_invert, Matrix4, invert); bench_unop!(_bench_matrix2_transpose, Matrix2, transpose); bench_unop!(_bench_matrix3_transpose, Matrix3, transpose); bench_unop!(_bench_matrix4_transpose, Matrix4, transpose); + +bench_unop!(_bench_matrix4_determinant, Matrix4, determinant); \ No newline at end of file diff --git a/src/macros.rs b/src/macros.rs index ac741e8..e1fa8c6 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -381,21 +381,11 @@ macro_rules! impl_operator_simd { let $x: $Simd = self.into(); $body } } - - // #[cfg(feature = "simd")] - // impl<'a> $Op for &'a $Lhs { - // type Output = $Output; - // #[inline] - // fn $op(self) -> $Output { - // let $x: $Simd = (*self).into(); $body - // } - // } }; // When the right operand is a scalar (@rs [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } }) => { - impl $Op<$Rhs> for $Lhs { #[inline] fn $op(self, other: $Rhs) -> $Output { @@ -411,6 +401,7 @@ macro_rules! impl_operator_simd { } } }; + // When the right operand is a compound type ([$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } @@ -430,7 +421,6 @@ macro_rules! impl_operator_simd { let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), (*other).into()); $body } } - impl<'a> $Op<$Rhs> for &'a $Lhs { #[inline] @@ -439,7 +429,6 @@ macro_rules! impl_operator_simd { } } - impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { #[inline] fn $op(self, other: &'a $Rhs) -> $Output { @@ -447,18 +436,17 @@ macro_rules! impl_operator_simd { } } }; + // When the left operand is a scalar (@ls [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ident { fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr } }) => { - impl $Op<$Rhs> for $Lhs { #[inline] fn $op(self, other: $Rhs) -> $Output { let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), other.into()); $body } } - impl<'a> $Op<&'a $Rhs> for $Lhs { #[inline] @@ -467,47 +455,4 @@ macro_rules! impl_operator_simd { } } }; - - // // When left is row-vec, right is colume-matrix - // (@vm [$Simd: ident]; $Op:ident<$Rhs:ty> for $Lhs:ty { - // fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { - - // } - // }) - - // When matrix with matrix - (@mm $Op:ident<$Rhs:ty> for $Lhs:ty { - fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body: expr } - }) => { - impl $Op<$Rhs> for $Lhs { - #[inline] - fn $op(self, other: $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a> $Op<&'a $Rhs> for $Lhs { - #[inline] - fn $op(self, other: &'a $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a> $Op<$Rhs> for &'a $Lhs { - #[inline] - fn $op(self, other: $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - - - impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs { - #[inline] - fn $op(self, other: &'a $Rhs) -> $Output { - let ($lhs, $rhs) = (self, other); $body - } - } - } } diff --git a/src/matrix.rs b/src/matrix.rs index dc9f856..397d2ba 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -645,23 +645,10 @@ impl SquareMatrix for Matrix4 { } fn determinant(&self) -> S { - let m0 = Matrix3::new(self[1][1], self[2][1], self[3][1], - self[1][2], self[2][2], self[3][2], - self[1][3], self[2][3], self[3][3]); - let m1 = Matrix3::new(self[0][1], self[2][1], self[3][1], - self[0][2], self[2][2], self[3][2], - self[0][3], self[2][3], self[3][3]); - let m2 = Matrix3::new(self[0][1], self[1][1], self[3][1], - self[0][2], self[1][2], self[3][2], - self[0][3], self[1][3], self[3][3]); - let m3 = Matrix3::new(self[0][1], self[1][1], self[2][1], - self[0][2], self[1][2], self[2][2], - self[0][3], self[1][3], self[2][3]); - - self[0][0] * m0.determinant() - - self[1][0] * m1.determinant() + - self[2][0] * m2.determinant() - - self[3][0] * m3.determinant() + let tmp = unsafe { + det_sub_proc_unsafe(self, 1, 2, 3) + }; + tmp.dot(Vector4::new(self[0][0], self[1][0], self[2][0], self[3][0])) } #[inline] @@ -672,6 +659,12 @@ impl SquareMatrix for Matrix4 { self[3][3]) } + // The new implementation results in negative optimization when used + // without SIMD. so we opt them in with configuration. + // A better option would be using specialization. But currently somewhat + // specialization is too buggy, and it won't apply here. I'm getting + // weird error msgs. Help wanted. + #[cfg(not(feature = "use_simd"))] fn invert(&self) -> Option> { let det = self.determinant(); if ulps_eq!(det, &S::zero()) { None } else { @@ -695,6 +688,27 @@ impl SquareMatrix for Matrix4 { cf(3, 0), cf(3, 1), cf(3, 2), cf(3, 3))) } } + #[cfg(feature = "use_simd")] + fn invert(&self) -> Option> { + let tmp0 = unsafe { + det_sub_proc_unsafe(self, 1, 2, 3) + }; + let det = tmp0.dot(Vector4::new(self[0][0], self[1][0], self[2][0], self[3][0])); + if ulps_eq!(det, &S::zero()) { None } else { + let inv_det = S::one() / det; + let tmp0 = tmp0 * inv_det; + let tmp1 = unsafe { + det_sub_proc_unsafe(self, 0, 3, 2) * inv_det + }; + let tmp2 = unsafe { + det_sub_proc_unsafe(self, 0, 1, 3) * inv_det + }; + let tmp3 = unsafe { + det_sub_proc_unsafe(self, 0, 2, 1) * inv_det + }; + Some(Matrix4::from_cols(tmp0, tmp1, tmp2, tmp3)) + } + } fn is_diagonal(&self) -> bool { ulps_eq!(self[0][1], &S::zero()) && @@ -1036,6 +1050,7 @@ impl_operator!( Mul > for Matrix3 { // causes the LLVM to miss identical loads and multiplies. This optimization // causes the code to be auto vectorized properly increasing the performance // around ~4 times. +// Update: this should now be a bit more efficient impl_operator!( Mul > for Matrix4 { fn mul(lhs, rhs) -> Matrix4 { @@ -1335,39 +1350,26 @@ impl Rand for Matrix4 { } } -// Specialization strangely won't work for this. -// TODO: find another way -// #[cfg(feature = "use_simd")] -// impl SquareMatrix for Matrix4 { -// fn determinant(&self) -> f32 { -// let a = Simdf32x4::new(self.z[1], self.x[1], self.w[1], self.y[1]); -// let b = Simdf32x4::new(self.y[2], self.y[2], self.z[2], self.z[2]); -// let c = Simdf32x4::new(self.x[3], self.z[3], self.x[3], self.z[3]); -// let mut tmp = a * (b * c); -// let d = Simdf32x4::new(self.y[1], self.y[1], self.z[1], self.z[1]); -// let e = Simdf32x4::new(self.x[2], self.z[2], self.x[2], self.z[2]); -// let f = Simdf32x4::new(self.z[3], self.x[3], self.w[3], self.y[3]); -// let tmp1 = d * (e * f); -// tmp = tmp + tmp1; -// let g = Simdf32x4::new(self.x[1], self.z[1], self.x[1], self.z[1]); -// let h = Simdf32x4::new(self.z[2], self.x[2], self.w[2], self.y[2]); -// let i = Simdf32x4::new(self.y[3], self.y[3], self.z[3], self.z[3]); -// let tmp1 = g * (h * i); -// tmp = tmp + tmp1; -// let tmp1 = g * (b * f); -// tmp = tmp - tmp1; -// let tmp1 = d * (h * c); -// tmp = tmp - tmp1; -// let tmp1 = a * (e * i); -// tmp = tmp - tmp1; -// let tmp: Vector4 = (tmp * Simdf32x4::new(self.x[0], self.y[0], self.z[0], self.w[0])).into(); -// tmp.sum() -// } -// } +// Sub procedure for SIMD when dealing with determinant and inversion +#[inline] +unsafe fn det_sub_proc_unsafe(m: &Matrix4, x: usize, y: usize, z: usize) -> Vector4 { + let s: &[S; 16] = m.as_ref(); + let a = Vector4::new(*s.get_unchecked(4 + x), *s.get_unchecked(12 + x), *s.get_unchecked(x), *s.get_unchecked(8 + x)); + let b = Vector4::new(*s.get_unchecked(8 + y), *s.get_unchecked(8 + y), *s.get_unchecked(4 + y), *s.get_unchecked(4 + y)); + let c = Vector4::new(*s.get_unchecked(12 + z), *s.get_unchecked(z), *s.get_unchecked(12 + z), *s.get_unchecked(z)); -// #[cfg(feature = "use_simd")] -// impl_operator_simd!(@mm Mul> for Matrix4 { -// fn mul(matrix, vector) -> Vector4 { -// matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3] -// } -// }); \ No newline at end of file + let d = Vector4::new(*s.get_unchecked(8 + x), *s.get_unchecked(8 + x), *s.get_unchecked(4 + x), *s.get_unchecked(4 + x)); + let e = Vector4::new(*s.get_unchecked(12 + y), *s.get_unchecked(y), *s.get_unchecked(12 + y), *s.get_unchecked(y)); + let f = Vector4::new(*s.get_unchecked(4 + z), *s.get_unchecked(12 + z), *s.get_unchecked(z), *s.get_unchecked(8 + z)); + + let g = Vector4::new(*s.get_unchecked(12 + x), *s.get_unchecked(x), *s.get_unchecked(12 + x), *s.get_unchecked(x)); + let h = Vector4::new(*s.get_unchecked(4 + y), *s.get_unchecked(12 + y), *s.get_unchecked(y), *s.get_unchecked(8 + y)); + let i = Vector4::new(*s.get_unchecked(8 + z), *s.get_unchecked(8 + z), *s.get_unchecked(4 + z), *s.get_unchecked(4 + z)); + let mut tmp = a.mul_element_wise(b.mul_element_wise(c)); + tmp += d.mul_element_wise(e.mul_element_wise(f)); + tmp += g.mul_element_wise(h.mul_element_wise(i)); + tmp -= a.mul_element_wise(e.mul_element_wise(i)); + tmp -= d.mul_element_wise(h.mul_element_wise(c)); + tmp -= g.mul_element_wise(b.mul_element_wise(f)); + tmp +} \ No newline at end of file