Merge pull request #11 from csherratt/mat4_opt_only
Improved the performance of mat4 multiply.
This commit is contained in:
commit
b87aec64ac
1 changed files with 16 additions and 4 deletions
|
@ -458,15 +458,27 @@ for Mat3<S>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Using self.r(0).dot(other.c(0)) like the other matrix multiplies
|
||||||
|
// causes the LLVM to miss identical loads and multiplies. This optimization
|
||||||
|
// causes the code to be auto vectorized properly increasing the performance
|
||||||
|
// around ~4 times.
|
||||||
|
macro_rules! dot_mat4(
|
||||||
|
($A:expr, $B:expr, $I:expr, $J:expr) => (
|
||||||
|
(*$A.cr(0, $I)) * (*$B.cr($J, 0)) +
|
||||||
|
(*$A.cr(1, $I)) * (*$B.cr($J, 1)) +
|
||||||
|
(*$A.cr(2, $I)) * (*$B.cr($J, 2)) +
|
||||||
|
(*$A.cr(3, $I)) * (*$B.cr($J, 3))
|
||||||
|
))
|
||||||
|
|
||||||
impl<S: Float>
|
impl<S: Float>
|
||||||
Matrix<S, [Vec4<S>, ..4], Vec4<S>, [S, ..4]>
|
Matrix<S, [Vec4<S>, ..4], Vec4<S>, [S, ..4]>
|
||||||
for Mat4<S>
|
for Mat4<S>
|
||||||
{
|
{
|
||||||
fn mul_m(&self, other: &Mat4<S>) -> Mat4<S> {
|
fn mul_m(&self, other: &Mat4<S>) -> Mat4<S> {
|
||||||
Mat4::new(self.r(0).dot(other.c(0)), self.r(1).dot(other.c(0)), self.r(2).dot(other.c(0)), self.r(3).dot(other.c(0)),
|
Mat4::new(dot_mat4!(self, other, 0, 0), dot_mat4!(self, other, 1, 0), dot_mat4!(self, other, 2, 0), dot_mat4!(self, other, 3, 0),
|
||||||
self.r(0).dot(other.c(1)), self.r(1).dot(other.c(1)), self.r(2).dot(other.c(1)), self.r(3).dot(other.c(1)),
|
dot_mat4!(self, other, 0, 1), dot_mat4!(self, other, 1, 1), dot_mat4!(self, other, 2, 1), dot_mat4!(self, other, 3, 1),
|
||||||
self.r(0).dot(other.c(2)), self.r(1).dot(other.c(2)), self.r(2).dot(other.c(2)), self.r(3).dot(other.c(2)),
|
dot_mat4!(self, other, 0, 2), dot_mat4!(self, other, 1, 2), dot_mat4!(self, other, 2, 2), dot_mat4!(self, other, 3, 2),
|
||||||
self.r(0).dot(other.c(3)), self.r(1).dot(other.c(3)), self.r(2).dot(other.c(3)), self.r(3).dot(other.c(3)))
|
dot_mat4!(self, other, 0, 3), dot_mat4!(self, other, 1, 3), dot_mat4!(self, other, 2, 3), dot_mat4!(self, other, 3, 3))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transpose(&self) -> Mat4<S> {
|
fn transpose(&self) -> Mat4<S> {
|
||||||
|
|
Loading…
Reference in a new issue