From e845fd9b8c8a8150ad643ccd1982d696cc8fcf8b Mon Sep 17 00:00:00 2001 From: Brendan Zabarauskas Date: Wed, 30 Sep 2015 20:06:17 +1000 Subject: [PATCH] Reduce scope of dot_matrix4! macro --- src/matrix.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/matrix.rs b/src/matrix.rs index 475dfcc..6ebc659 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -622,18 +622,6 @@ impl Matrix> for Matrix3 { } } -// Using self.row(0).dot(other[0]) like the other matrix multiplies -// causes the LLVM to miss identical loads and multiplies. This optimization -// causes the code to be auto vectorized properly increasing the performance -// around ~4 times. -macro_rules! dot_matrix4( - ($A:expr, $B:expr, $I:expr, $J:expr) => ( - ($A[0][$I]) * ($B[$J][0]) + - ($A[1][$I]) * ($B[$J][1]) + - ($A[2][$I]) * ($B[$J][2]) + - ($A[3][$I]) * ($B[$J][3]) -)); - impl Matrix> for Matrix4 { #[inline] fn from_value(value: S) -> Matrix4 { @@ -965,6 +953,19 @@ impl<'a, 'b, S: BaseNum> Mul<&'a Matrix4> for &'b Matrix4 { type Output = Matrix4; fn mul(self, other: &'a Matrix4) -> Matrix4 { + // Using self.row(0).dot(other[0]) like the other matrix multiplies + // causes the LLVM to miss identical loads and multiplies. This optimization + // causes the code to be auto vectorized properly increasing the performance + // around ~4 times. + macro_rules! dot_matrix4 { + ($A:expr, $B:expr, $I:expr, $J:expr) => { + ($A[0][$I]) * ($B[$J][0]) + + ($A[1][$I]) * ($B[$J][1]) + + ($A[2][$I]) * ($B[$J][2]) + + ($A[3][$I]) * ($B[$J][3]) + }; + }; + Matrix4::new(dot_matrix4!(self, other, 0, 0), dot_matrix4!(self, other, 1, 0), dot_matrix4!(self, other, 2, 0), dot_matrix4!(self, other, 3, 0), dot_matrix4!(self, other, 0, 1), dot_matrix4!(self, other, 1, 1), dot_matrix4!(self, other, 2, 1), dot_matrix4!(self, other, 3, 1), dot_matrix4!(self, other, 0, 2), dot_matrix4!(self, other, 1, 2), dot_matrix4!(self, other, 2, 2), dot_matrix4!(self, other, 3, 2),