From 149c781e81993889f59476411a6e8177cc12b5be Mon Sep 17 00:00:00 2001 From: Colin Sherratt Date: Wed, 27 Nov 2013 12:20:13 -0500 Subject: [PATCH] Documented the mat4 multiply optimization. --- src/cgmath/matrix.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/cgmath/matrix.rs b/src/cgmath/matrix.rs index 4e93534..23437b0 100644 --- a/src/cgmath/matrix.rs +++ b/src/cgmath/matrix.rs @@ -458,12 +458,16 @@ for Mat3 } } +// Using self.r(0).dot(other.c(0)) like the other matrix multiplies +// causes the LLVM to miss identical loads and multiplies. This optimization +// causes the code to be auto vectorized properly increasing the performance +// around ~4 times. macro_rules! dot_mat4( ($A:expr, $B:expr, $I:expr, $J:expr) => ( - *$A.cr(0, $I) * *$B.cr($J, 0) + - *$A.cr(1, $I) * *$B.cr($J, 1) + - *$A.cr(2, $I) * *$B.cr($J, 2) + - *$A.cr(3, $I) * *$B.cr($J, 3) + (*$A.cr(0, $I)) * (*$B.cr($J, 0)) + + (*$A.cr(1, $I)) * (*$B.cr($J, 1)) + + (*$A.cr(2, $I)) * (*$B.cr($J, 2)) + + (*$A.cr(3, $I)) * (*$B.cr($J, 3)) )) impl