[WIP]Add basic SIMD support

- Add an opt-in SIMD support for the module. The feature requires crate `simd` and specialization, thus can only be enabled under nightly. Under the given benchmark certain operations were able to be up to 60% faster. Currently the supported types as well as operations are highly limited. - Clean up some deadly tests. Also add new tests for SIMD.
2017-02-25 07:26:11 +08:00 · 2017-02-25 07:26:11 +08:00 · 64924b954d
commit 64924b954d
parent 4e29dc1b2e
7 changed files with 1335 additions and 31 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -30,6 +30,7 @@ name = "cgmath"
 unstable = []
 default = ["rustc-serialize"]
 eders = ["serde", "serde_macros"]
 use_simd = ["simd"]
 [dependencies]
 approx = "0.1"
@ -38,6 +39,7 @@ rand = "0.3"
 rustc-serialize = { version = "0.3", optional = true }
 serde = { version = "0.8", optional = true }
 serde_macros = { version = "0.8", optional = true }
 simd = { version = "0.2", optional = true }
 [dev-dependencies]
 glium = "0.15"
--- a/src/lib.rs
+++ b/src/lib.rs
@ -49,9 +49,9 @@
 //! ```rust
 //! use cgmath::prelude::*;
 //! ```
 #![cfg_attr(feature = "eders", feature(plugin, custom_derive))]
 #![cfg_attr(feature = "eders", plugin(serde_macros))]
 #![cfg_attr(feature = "use_simd", feature(specialization))]
 #[macro_use]
 extern crate approx;
@ -64,6 +64,9 @@ extern crate rustc_serialize;
 #[cfg(feature = "eders")]
 extern crate serde;
 #[cfg(feature = "use_simd")]
 extern crate simd;
 // Re-exports
 pub use approx::*;
--- a/src/macros.rs
+++ b/src/macros.rs
@ -254,3 +254,260 @@ macro_rules! impl_index_operators {
        }
    }
 }
 #[cfg(feature = "use_simd")]
 macro_rules! impl_operator_default {
    // When it is an unary operator
    (<$S:ident: $Constraint:ident> $Op:ident for $Lhs:ty {
        fn $op:ident($x:ident) -> $Output:ty { $body:expr }
    }) => {
        impl<$S: $Constraint> $Op for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self) -> $Output {
                let $x = self; $body
            }
        }
        impl<'a, $S: $Constraint> $Op for &'a $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self) -> $Output {
                let $x = self; $body
            }
        }
    };
    // When the right operand is a scalar
    (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ident> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl<$S: $Constraint> $Op<$Rhs> for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a, $S: $Constraint> $Op<$Rhs> for &'a $Lhs {
          type Output = $Output;
            #[inline]
            default fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
    };
    // When the right operand is a compound type
    (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ty> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl<$S: $Constraint> $Op<$Rhs> for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a, $S: $Constraint> $Op<&'a $Rhs> for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a, $S: $Constraint> $Op<$Rhs> for &'a $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a, 'b, $S: $Constraint> $Op<&'a $Rhs> for &'b $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
    };
    // When the left operand is a scalar
    ($Op:ident<$Rhs:ident<$S:ident>> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl $Op<$Rhs<$S>> for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: $Rhs<$S>) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a> $Op<&'a $Rhs<$S>> for $Lhs {
           type Output = $Output;
            #[inline]
            default fn $op(self, other: &'a $Rhs<$S>) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
    };
 }
 #[cfg(feature = "use_simd")]
 macro_rules! impl_assignment_operator_default {
    (<$S:ident: $Constraint:ident> $Op:ident<$Rhs:ty> for $Lhs:ty {
        fn $op:ident(&mut $lhs:ident, $rhs:ident) $body:block
    }) => {
        impl<$S: $Constraint + $Op<$S>> $Op<$Rhs> for $Lhs {
            #[inline]
            default fn $op(&mut $lhs, $rhs: $Rhs) $body
        }
    };
 }
 /// Generates a binary operator implementation for the permutations of by-ref and by-val, for simd
 #[cfg(feature = "use_simd")]
 macro_rules! impl_operator_simd {
    // When it is an unary operator
    ([$Simd:ident]; $Op:ident for $Lhs:ty {
        fn $op:ident($x:ident) -> $Output:ty { $body:expr }
    }) => {
        impl $Op for $Lhs {
            #[inline]
            fn $op(self) -> $Output {
                let $x: $Simd = self.into(); $body
            }
        }
        // #[cfg(feature = "simd")]
        // impl<'a> $Op for &'a $Lhs {
        //     type Output = $Output;
        //     #[inline]
        //     fn $op(self) -> $Output {
        //         let $x: $Simd = (*self).into(); $body
        //     }
        // }
    };
    // When the right operand is a scalar
    (@rs [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl $Op<$Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), $Simd::splat(other)); $body
            }
        }
        impl<'a> $Op<$Rhs> for &'a $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), $Simd::splat(other)); $body
            }
        }
    };
    // When the right operand is a compound type
    ([$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl $Op<$Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), other.into()); $body
            }
        }
        impl<'a> $Op<&'a $Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = (self.into(), (*other).into()); $body
            }
        }
        impl<'a> $Op<$Rhs> for &'a $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), other.into()); $body
            }
        }
        impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs {
            #[inline]
            fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = ((*self).into(), (*other).into()); $body
            }
        }
    };
    // When the left operand is a scalar
    (@ls [$Simd:ident]; $Op:ident<$Rhs:ty> for $Lhs:ident {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body:expr }
    }) => {
        impl $Op<$Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), other.into()); $body
            }
        }
        impl<'a> $Op<&'a $Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs): ($Simd, $Simd) = ($Simd::splat(self), (*other).into()); $body
            }
        }
    };
    // // When left is row-vec, right is colume-matrix
    // (@vm [$Simd: ident]; $Op:ident<$Rhs:ty> for $Lhs:ty {
    //     fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty {
    //     }
    // })
    // When matrix with matrix
    (@mm $Op:ident<$Rhs:ty> for $Lhs:ty {
        fn $op:ident($lhs:ident, $rhs:ident) -> $Output:ty { $body: expr }
    }) => {
        impl $Op<$Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a> $Op<&'a $Rhs> for $Lhs {
            #[inline]
            fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a> $Op<$Rhs> for &'a $Lhs {
            #[inline]
            fn $op(self, other: $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
        impl<'a, 'b> $Op<&'a $Rhs> for &'b $Lhs {
            #[inline]
            fn $op(self, other: &'a $Rhs) -> $Output {
                let ($lhs, $rhs) = (self, other); $body
            }
        }
    }
 }
--- a/src/matrix.rs
+++ b/src/matrix.rs
@ -615,6 +615,7 @@ impl<S: BaseFloat> Matrix for Matrix4<S> {
    }
 }
 //#[cfg(not(feature = "use_simd"))]
 impl<S: BaseFloat> SquareMatrix for Matrix4<S> {
    type ColumnRow = Vector4<S>;
@ -672,7 +673,7 @@ impl<S: BaseFloat> SquareMatrix for Matrix4<S> {
    }
    fn invert(&self) -> Option<Matrix4<S>> {
-        let det = self.determinant();
+        let det: S = self.determinant();
        if ulps_eq!(det, &S::zero()) { None } else {
            let inv_det = S::one() / det;
            let t = self.transpose();
@ -731,6 +732,123 @@ impl<S: BaseFloat> SquareMatrix for Matrix4<S> {
        ulps_eq!(self[3][2], &self[2][3])
    }
 }
 // #[cfg(feature = "use_simd")]
 // impl<S: BaseFloat> SquareMatrix for Matrix4<S> {
 //     type ColumnRow = Vector4<S>;
 //     #[inline]
 //     default fn from_value(value: S) -> Matrix4<S> {
 //         Matrix4::new(value, S::zero(), S::zero(), S::zero(),
 //                      S::zero(), value, S::zero(), S::zero(),
 //                      S::zero(), S::zero(), value, S::zero(),
 //                      S::zero(), S::zero(), S::zero(), value)
 //     }
 //     #[inline]
 //     default fn from_diagonal(value: Vector4<S>) -> Matrix4<S> {
 //         Matrix4::new(value.x, S::zero(), S::zero(), S::zero(),
 //                      S::zero(), value.y, S::zero(), S::zero(),
 //                      S::zero(), S::zero(), value.z, S::zero(),
 //                      S::zero(), S::zero(), S::zero(), value.w)
 //     }
 //     default fn transpose_self(&mut self) {
 //         self.swap_elements((0, 1), (1, 0));
 //         self.swap_elements((0, 2), (2, 0));
 //         self.swap_elements((0, 3), (3, 0));
 //         self.swap_elements((1, 2), (2, 1));
 //         self.swap_elements((1, 3), (3, 1));
 //         self.swap_elements((2, 3), (3, 2));
 //     }
 //     default fn determinant(&self) -> S {
 //         let m0 = Matrix3::new(self[1][1], self[2][1], self[3][1],
 //                               self[1][2], self[2][2], self[3][2],
 //                               self[1][3], self[2][3], self[3][3]);
 //         let m1 = Matrix3::new(self[0][1], self[2][1], self[3][1],
 //                               self[0][2], self[2][2], self[3][2],
 //                               self[0][3], self[2][3], self[3][3]);
 //         let m2 = Matrix3::new(self[0][1], self[1][1], self[3][1],
 //                               self[0][2], self[1][2], self[3][2],
 //                               self[0][3], self[1][3], self[3][3]);
 //         let m3 = Matrix3::new(self[0][1], self[1][1], self[2][1],
 //                               self[0][2], self[1][2], self[2][2],
 //                               self[0][3], self[1][3], self[2][3]);
 //         self[0][0] * m0.determinant() -
 //         self[1][0] * m1.determinant() +
 //         self[2][0] * m2.determinant() -
 //         self[3][0] * m3.determinant()
 //     }
 //     #[inline]
 //     default fn diagonal(&self) -> Vector4<S> {
 //         Vector4::new(self[0][0],
 //                      self[1][1],
 //                      self[2][2],
 //                      self[3][3])
 //     }
 //     default fn invert(&self) -> Option<Matrix4<S>> {
 //         let det = self.determinant();
 //         if ulps_eq!(det, &S::zero()) { None } else {
 //             let inv_det = S::one() / det;
 //             let t = self.transpose();
 //             let cf = |i, j| {
 //                 let mat = match i {
 //                     0 => Matrix3::from_cols(t.y.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)),
 //                     1 => Matrix3::from_cols(t.x.truncate_n(j), t.z.truncate_n(j), t.w.truncate_n(j)),
 //                     2 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.w.truncate_n(j)),
 //                     3 => Matrix3::from_cols(t.x.truncate_n(j), t.y.truncate_n(j), t.z.truncate_n(j)),
 //                     _ => panic!("out of range"),
 //                 };
 //                 let sign = if (i + j) & 1 == 1 { -S::one() } else { S::one() };
 //                 mat.determinant() * sign * inv_det
 //             };
 //             Some(Matrix4::new(cf(0, 0), cf(0, 1), cf(0, 2), cf(0, 3),
 //                               cf(1, 0), cf(1, 1), cf(1, 2), cf(1, 3),
 //                               cf(2, 0), cf(2, 1), cf(2, 2), cf(2, 3),
 //                               cf(3, 0), cf(3, 1), cf(3, 2), cf(3, 3)))
 //         }
 //     }
 //     default fn is_diagonal(&self) -> bool {
 //         ulps_eq!(self[0][1], &S::zero()) &&
 //         ulps_eq!(self[0][2], &S::zero()) &&
 //         ulps_eq!(self[0][3], &S::zero()) &&
 //         ulps_eq!(self[1][0], &S::zero()) &&
 //         ulps_eq!(self[1][2], &S::zero()) &&
 //         ulps_eq!(self[1][3], &S::zero()) &&
 //         ulps_eq!(self[2][0], &S::zero()) &&
 //         ulps_eq!(self[2][1], &S::zero()) &&
 //         ulps_eq!(self[2][3], &S::zero()) &&
 //         ulps_eq!(self[3][0], &S::zero()) &&
 //         ulps_eq!(self[3][1], &S::zero()) &&
 //         ulps_eq!(self[3][2], &S::zero())
 //     }
 //     default fn is_symmetric(&self) -> bool {
 //         ulps_eq!(self[0][1], &self[1][0]) &&
 //         ulps_eq!(self[0][2], &self[2][0]) &&
 //         ulps_eq!(self[0][3], &self[3][0]) &&
 //         ulps_eq!(self[1][0], &self[0][1]) &&
 //         ulps_eq!(self[1][2], &self[2][1]) &&
 //         ulps_eq!(self[1][3], &self[3][1]) &&
 //         ulps_eq!(self[2][0], &self[0][2]) &&
 //         ulps_eq!(self[2][1], &self[1][2]) &&
 //         ulps_eq!(self[2][3], &self[3][2]) &&
 //         ulps_eq!(self[3][0], &self[0][3]) &&
 //         ulps_eq!(self[3][1], &self[1][3]) &&
 //         ulps_eq!(self[3][2], &self[2][3])
 //     }
 // }
 impl<S: BaseFloat> ApproxEq for Matrix2<S> {
    type Epsilon = S::Epsilon;
@ -955,10 +1073,6 @@ macro_rules! impl_matrix {
            fn sub_assign(&mut self, other: $MatrixN<S>) { $(self.$field -= other.$field);+ }
        }
        impl_operator!(<S: BaseFloat> Mul<$VectorN<S> > for $MatrixN<S> {
            fn mul(matrix, vector) -> $VectorN<S> { $VectorN::new($(matrix.row($row_index).dot(vector.clone())),+) }
        });
        impl_scalar_ops!($MatrixN<usize> { $($field),+ });
        impl_scalar_ops!($MatrixN<u8> { $($field),+ });
        impl_scalar_ops!($MatrixN<u16> { $($field),+ });
@ -1001,6 +1115,25 @@ impl_matrix!(Matrix2, Vector2 { x: 0, y: 1 });
 impl_matrix!(Matrix3, Vector3 { x: 0, y: 1, z: 2 });
 impl_matrix!(Matrix4, Vector4 { x: 0, y: 1, z: 2, w: 3 });
 macro_rules! impl_mv_operator {
    ($MatrixN:ident, $VectorN:ident { $($field:ident : $row_index:expr),+ }) => {
        impl_operator!(<S: BaseFloat> Mul<$VectorN<S> > for $MatrixN<S> {
            fn mul(matrix, vector) -> $VectorN<S> {$VectorN::new($(matrix.row($row_index).dot(vector.clone())),+)}
        });
    }
 }
 impl_mv_operator!(Matrix2, Vector2 { x: 0, y: 1 });
 impl_mv_operator!(Matrix3, Vector3 { x: 0, y: 1, z: 2 });
 #[cfg(not(feature = "use_simd"))]
 impl_mv_operator!(Matrix4, Vector4 { x: 0, y: 1, z: 2, w: 3 });
 #[cfg(feature = "use_simd")]
 impl_operator!(<S: BaseFloat> Mul<Vector4<S> > for Matrix4<S> {
    fn mul(matrix, vector) -> Vector4<S> {
        matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3]
    }
 });
 impl_operator!(<S: BaseFloat> Mul<Matrix2<S> > for Matrix2<S> {
    fn mul(lhs, rhs) -> Matrix2<S> {
        Matrix2::new(lhs.row(0).dot(rhs[0]), lhs.row(1).dot(rhs[0]),
@ -1020,21 +1153,21 @@ impl_operator!(<S: BaseFloat> Mul<Matrix3<S> > for Matrix3<S> {
 // causes the LLVM to miss identical loads and multiplies. This optimization
 // causes the code to be auto vectorized properly increasing the performance
 // around ~4 times.
 macro_rules! dot_matrix4 {
    ($A:expr, $B:expr, $I:expr, $J:expr) => {
        ($A[0][$I]) * ($B[$J][0]) +
        ($A[1][$I]) * ($B[$J][1]) +
        ($A[2][$I]) * ($B[$J][2]) +
        ($A[3][$I]) * ($B[$J][3])
    };
 }
 impl_operator!(<S: BaseFloat> Mul<Matrix4<S> > for Matrix4<S> {
    fn mul(lhs, rhs) -> Matrix4<S> {
-        Matrix4::new(dot_matrix4!(lhs, rhs, 0, 0), dot_matrix4!(lhs, rhs, 1, 0), dot_matrix4!(lhs, rhs, 2, 0), dot_matrix4!(lhs, rhs, 3, 0),
+        {
-                     dot_matrix4!(lhs, rhs, 0, 1), dot_matrix4!(lhs, rhs, 1, 1), dot_matrix4!(lhs, rhs, 2, 1), dot_matrix4!(lhs, rhs, 3, 1),
+            let a = lhs[0];
-                     dot_matrix4!(lhs, rhs, 0, 2), dot_matrix4!(lhs, rhs, 1, 2), dot_matrix4!(lhs, rhs, 2, 2), dot_matrix4!(lhs, rhs, 3, 2),
+            let b = lhs[1];
-                     dot_matrix4!(lhs, rhs, 0, 3), dot_matrix4!(lhs, rhs, 1, 3), dot_matrix4!(lhs, rhs, 2, 3), dot_matrix4!(lhs, rhs, 3, 3))
+            let c = lhs[2];
            let d = lhs[3];
            Matrix4::from_cols(
                a*rhs[0][0] + b*rhs[0][1] + c*rhs[0][2] + d*rhs[0][3],
                a*rhs[1][0] + b*rhs[1][1] + c*rhs[1][2] + d*rhs[1][3],
                a*rhs[2][0] + b*rhs[2][1] + c*rhs[2][2] + d*rhs[2][3],
                a*rhs[3][0] + b*rhs[3][1] + c*rhs[3][2] + d*rhs[3][3],
            )
        }
    }
 });
@ -1318,3 +1451,39 @@ impl<S: BaseFloat + Rand> Rand for Matrix4<S> {
        Matrix4{ x: rng.gen(), y: rng.gen(), z: rng.gen(), w: rng.gen() }
    }
 }
 // Sadly buggy.
 // #[cfg(feature = "use_simd")]
 // impl SquareMatrix for Matrix4<f32> {
 //     fn determinant(&self) -> f32 {
 //         let a = Simdf32x4::new(self.z[1], self.x[1], self.w[1], self.y[1]);
 //         let b = Simdf32x4::new(self.y[2], self.y[2], self.z[2], self.z[2]);
 //         let c = Simdf32x4::new(self.x[3], self.z[3], self.x[3], self.z[3]);
 //         let mut tmp = a * (b * c);
 //         let d = Simdf32x4::new(self.y[1], self.y[1], self.z[1], self.z[1]);
 //         let e = Simdf32x4::new(self.x[2], self.z[2], self.x[2], self.z[2]);
 //         let f = Simdf32x4::new(self.z[3], self.x[3], self.w[3], self.y[3]);
 //         let tmp1 = d * (e * f);
 //         tmp = tmp + tmp1;
 //         let g = Simdf32x4::new(self.x[1], self.z[1], self.x[1], self.z[1]);
 //         let h = Simdf32x4::new(self.z[2], self.x[2], self.w[2], self.y[2]);
 //         let i = Simdf32x4::new(self.y[3], self.y[3], self.z[3], self.z[3]);
 //         let tmp1 = g * (h * i);
 //         tmp = tmp + tmp1;
 //         let tmp1 = g * (b * f);
 //         tmp = tmp - tmp1;
 //         let tmp1 = d * (h * c);
 //         tmp = tmp - tmp1;
 //         let tmp1 = a * (e * i);
 //         tmp = tmp - tmp1;
 //         let tmp: Vector4<f32> = (tmp * Simdf32x4::new(self.x[0], self.y[0], self.z[0], self.w[0])).into();
 //         tmp.sum()
 //     }
 // }
 // #[cfg(feature = "use_simd")]
 // impl_operator_simd!(@mm Mul<Vector4<f32>> for Matrix4<f32> {
 //     fn mul(matrix, vector) -> Vector4<f32> {
 //         matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3]
 //     }
 // });
--- a/src/vector.rs
+++ b/src/vector.rs
@ -25,6 +25,13 @@ use angle::Rad;
 use approx::ApproxEq;
 use num::{BaseNum, BaseFloat, PartialOrd};
 #[cfg(feature = "use_simd")]
 use simd::f32x4 as Simdf32x4;
 #[cfg(feature = "use_simd")]
 use simd::i32x4 as Simdi32x4;
 #[cfg(feature = "use_simd")]
 use simd::u32x4 as Simdu32x4;
 /// A 1-dimensional vector.
 ///
 /// This type is marked as `#[repr(C)]`.
@ -291,6 +298,217 @@ macro_rules! impl_vector {
    }
 }
 // Utility macro for generating associated functions for the vectors
 #[cfg(feature = "use_simd")]
 macro_rules! impl_vector_default {
    ($VectorN:ident { $($field:ident),+ }, $n:expr, $constructor:ident) => {
        impl<S> $VectorN<S> {
            /// Construct a new vector, using the provided values.
            #[inline]
            pub fn new($($field: S),+) -> $VectorN<S> {
                $VectorN { $($field: $field),+ }
            }
        }
        /// The short constructor.
        #[inline]
        pub fn $constructor<S>($($field: S),+) -> $VectorN<S> {
            $VectorN::new($($field),+)
        }
        impl<S: NumCast + Copy> $VectorN<S> {
            /// Component-wise casting to another type
            #[inline]
            pub fn cast<T: NumCast>(&self) -> $VectorN<T> {
                $VectorN { $($field: NumCast::from(self.$field).unwrap()),+ }
            }
        }
        impl<S: BaseFloat> MetricSpace for $VectorN<S> {
            type Metric = S;
            #[inline]
            fn distance2(self, other: Self) -> S {
                (other - self).magnitude2()
            }
        }
        impl<S: Copy> Array for $VectorN<S> {
            type Element = S;
            #[inline]
            fn from_value(scalar: S) -> $VectorN<S> {
                $VectorN { $($field: scalar),+ }
            }
            #[inline]
            fn sum(self) -> S where S: Add<Output = S> {
                fold_array!(add, { $(self.$field),+ })
            }
            #[inline]
            fn product(self) -> S where S: Mul<Output = S> {
                fold_array!(mul, { $(self.$field),+ })
            }
            #[inline]
            fn min(self) -> S where S: PartialOrd {
                fold_array!(partial_min, { $(self.$field),+ })
            }
            #[inline]
            fn max(self) -> S where S: PartialOrd {
                fold_array!(partial_max, { $(self.$field),+ })
            }
        }
        impl<S: BaseNum> Zero for $VectorN<S> {
            #[inline]
            fn zero() -> $VectorN<S> {
                $VectorN::from_value(S::zero())
            }
            #[inline]
            fn is_zero(&self) -> bool {
                *self == $VectorN::zero()
            }
        }
        impl<S: BaseNum> VectorSpace for $VectorN<S> {
            type Scalar = S;
        }
        impl<S: Neg<Output = S>> Neg for $VectorN<S> {
            type Output = $VectorN<S>;
            #[inline]
            default fn neg(self) -> $VectorN<S> { $VectorN::new($(-self.$field),+) }
        }
        impl<S: BaseFloat> ApproxEq for $VectorN<S> {
            type Epsilon = S::Epsilon;
            #[inline]
            fn default_epsilon() -> S::Epsilon {
                S::default_epsilon()
            }
            #[inline]
            fn default_max_relative() -> S::Epsilon {
                S::default_max_relative()
            }
            #[inline]
            fn default_max_ulps() -> u32 {
                S::default_max_ulps()
            }
            #[inline]
            fn relative_eq(&self, other: &Self, epsilon: S::Epsilon, max_relative: S::Epsilon) -> bool {
                $(S::relative_eq(&self.$field, &other.$field, epsilon, max_relative))&&+
            }
            #[inline]
            fn ulps_eq(&self, other: &Self, epsilon: S::Epsilon, max_ulps: u32) -> bool {
                $(S::ulps_eq(&self.$field, &other.$field, epsilon, max_ulps))&&+
            }
        }
        impl<S: BaseFloat + Rand> Rand for $VectorN<S> {
            #[inline]
            fn rand<R: Rng>(rng: &mut R) -> $VectorN<S> {
                $VectorN { $($field: rng.gen()),+ }
            }
        }
        impl_operator_default!(<S: BaseNum> Add<$VectorN<S> > for $VectorN<S> {
            fn add(lhs, rhs) -> $VectorN<S> { $VectorN::new($(lhs.$field + rhs.$field),+) }
        });
        impl_assignment_operator_default!(<S: BaseNum> AddAssign<$VectorN<S> > for $VectorN<S> {
            fn add_assign(&mut self, other) { $(self.$field += other.$field);+ }
        });
        impl_operator_default!(<S: BaseNum> Sub<$VectorN<S> > for $VectorN<S> {
            fn sub(lhs, rhs) -> $VectorN<S> { $VectorN::new($(lhs.$field - rhs.$field),+) }
        });
        impl_assignment_operator_default!(<S: BaseNum> SubAssign<$VectorN<S> > for $VectorN<S> {
            fn sub_assign(&mut self, other) { $(self.$field -= other.$field);+ }
        });
        impl_operator_default!(<S: BaseNum> Mul<S> for $VectorN<S> {
            fn mul(vector, scalar) -> $VectorN<S> { $VectorN::new($(vector.$field * scalar),+) }
        });
        impl_assignment_operator_default!(<S: BaseNum> MulAssign<S> for $VectorN<S> {
            fn mul_assign(&mut self, scalar) { $(self.$field *= scalar);+ }
        });
        impl_operator_default!(<S: BaseNum> Div<S> for $VectorN<S> {
            fn div(vector, scalar) -> $VectorN<S> { $VectorN::new($(vector.$field / scalar),+) }
        });
        impl_assignment_operator_default!(<S: BaseNum> DivAssign<S> for $VectorN<S> {
            fn div_assign(&mut self, scalar) { $(self.$field /= scalar);+ }
        });
        impl_operator!(<S: BaseNum> Rem<S> for $VectorN<S> {
            fn rem(vector, scalar) -> $VectorN<S> { $VectorN::new($(vector.$field % scalar),+) }
        });
        impl_assignment_operator!(<S: BaseNum> RemAssign<S> for $VectorN<S> {
            fn rem_assign(&mut self, scalar) { $(self.$field %= scalar);+ }
        });
        impl<S: BaseNum> ElementWise for $VectorN<S> {
            #[inline] default fn add_element_wise(self, rhs: $VectorN<S>) -> $VectorN<S> { $VectorN::new($(self.$field + rhs.$field),+) }
            #[inline] default fn sub_element_wise(self, rhs: $VectorN<S>) -> $VectorN<S> { $VectorN::new($(self.$field - rhs.$field),+) }
            #[inline] default fn mul_element_wise(self, rhs: $VectorN<S>) -> $VectorN<S> { $VectorN::new($(self.$field * rhs.$field),+) }
            #[inline] default fn div_element_wise(self, rhs: $VectorN<S>) -> $VectorN<S> { $VectorN::new($(self.$field / rhs.$field),+) }
            #[inline] fn rem_element_wise(self, rhs: $VectorN<S>) -> $VectorN<S> { $VectorN::new($(self.$field % rhs.$field),+) }
            #[inline] default fn add_assign_element_wise(&mut self, rhs: $VectorN<S>) { $(self.$field += rhs.$field);+ }
            #[inline] default fn sub_assign_element_wise(&mut self, rhs: $VectorN<S>) { $(self.$field -= rhs.$field);+ }
            #[inline] default fn mul_assign_element_wise(&mut self, rhs: $VectorN<S>) { $(self.$field *= rhs.$field);+ }
            #[inline] default fn div_assign_element_wise(&mut self, rhs: $VectorN<S>) { $(self.$field /= rhs.$field);+ }
            #[inline] fn rem_assign_element_wise(&mut self, rhs: $VectorN<S>) { $(self.$field %= rhs.$field);+ }
        }
        impl<S: BaseNum> ElementWise<S> for $VectorN<S> {
            #[inline] default fn add_element_wise(self, rhs: S) -> $VectorN<S> { $VectorN::new($(self.$field + rhs),+) }
            #[inline] default fn sub_element_wise(self, rhs: S) -> $VectorN<S> { $VectorN::new($(self.$field - rhs),+) }
            #[inline] default fn mul_element_wise(self, rhs: S) -> $VectorN<S> { $VectorN::new($(self.$field * rhs),+) }
            #[inline] default fn div_element_wise(self, rhs: S) -> $VectorN<S> { $VectorN::new($(self.$field / rhs),+) }
            #[inline] fn rem_element_wise(self, rhs: S) -> $VectorN<S> { $VectorN::new($(self.$field % rhs),+) }
            #[inline] default fn add_assign_element_wise(&mut self, rhs: S) { $(self.$field += rhs);+ }
            #[inline] default fn sub_assign_element_wise(&mut self, rhs: S) { $(self.$field -= rhs);+ }
            #[inline] default fn mul_assign_element_wise(&mut self, rhs: S) { $(self.$field *= rhs);+ }
            #[inline] default fn div_assign_element_wise(&mut self, rhs: S) { $(self.$field /= rhs);+ }
            #[inline] fn rem_assign_element_wise(&mut self, rhs: S) { $(self.$field %= rhs);+ }
        }
        impl_scalar_ops!($VectorN<usize> { $($field),+ });
        impl_scalar_ops!($VectorN<u8> { $($field),+ });
        impl_scalar_ops!($VectorN<u16> { $($field),+ });
        impl_scalar_ops_default!($VectorN<u32> { $($field),+ });
        impl_scalar_ops!($VectorN<u64> { $($field),+ });
        impl_scalar_ops!($VectorN<isize> { $($field),+ });
        impl_scalar_ops!($VectorN<i8> { $($field),+ });
        impl_scalar_ops!($VectorN<i16> { $($field),+ });
        impl_scalar_ops_default!($VectorN<i32> { $($field),+ });
        impl_scalar_ops!($VectorN<i64> { $($field),+ });
        impl_scalar_ops_default!($VectorN<f32> { $($field),+ });
        impl_scalar_ops!($VectorN<f64> { $($field),+ });
        impl_index_operators!($VectorN<S>, $n, S, usize);
        impl_index_operators!($VectorN<S>, $n, [S], Range<usize>);
        impl_index_operators!($VectorN<S>, $n, [S], RangeTo<usize>);
        impl_index_operators!($VectorN<S>, $n, [S], RangeFrom<usize>);
        impl_index_operators!($VectorN<S>, $n, [S], RangeFull);
    }
 }
 macro_rules! impl_scalar_ops {
    ($VectorN:ident<$S:ident> { $($field:ident),+ }) => {
        impl_operator!(Mul<$VectorN<$S>> for $S {
@ -305,10 +523,28 @@ macro_rules! impl_scalar_ops {
    };
 }
 #[cfg(feature = "use_simd")]
 macro_rules! impl_scalar_ops_default {
    ($VectorN:ident<$S:ident> { $($field:ident),+ }) => {
        impl_operator_default!(Mul<$VectorN<$S>> for $S {
            fn mul(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar * vector.$field),+) }
        });
        impl_operator_default!(Div<$VectorN<$S>> for $S {
            fn div(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar / vector.$field),+) }
        });
        impl_operator_default!(Rem<$VectorN<$S>> for $S {
            fn rem(scalar, vector) -> $VectorN<$S> { $VectorN::new($(scalar % vector.$field),+) }
        });
    };
 }
 impl_vector!(Vector1 { x }, 1, vec1);
 impl_vector!(Vector2 { x, y }, 2, vec2);
 impl_vector!(Vector3 { x, y, z }, 3, vec3);
 #[cfg(not(feature = "use_simd"))]
 impl_vector!(Vector4 { x, y, z, w }, 4, vec4);
 #[cfg(feature = "use_simd")]
 impl_vector_default!(Vector4 { x, y, z, w }, 4, vec4);
 impl_fixed_array_conversions!(Vector1<S> { x: 0 }, 1);
 impl_fixed_array_conversions!(Vector2<S> { x: 0, y: 1 }, 2);
@ -350,7 +586,7 @@ impl<S: BaseNum> Vector2<S> {
    /// Create a `Vector3`, using the `x` and `y` values from this vector, and the
    /// provided `z`.
    #[inline]
-    pub fn extend(self, z: S)-> Vector3<S> {
+    pub fn extend(self, z: S) -> Vector3<S> {
        Vector3::new(self.x, self.y, z)
    }
 }
@ -386,13 +622,13 @@ impl<S: BaseNum> Vector3<S> {
    /// Create a `Vector4`, using the `x`, `y` and `z` values from this vector, and the
    /// provided `w`.
    #[inline]
-    pub fn extend(self, w: S)-> Vector4<S> {
+    pub fn extend(self, w: S) -> Vector4<S> {
        Vector4::new(self.x, self.y, self.z, w)
    }
    /// Create a `Vector2`, dropping the `z` value.
    #[inline]
-    pub fn truncate(self)-> Vector2<S> {
+    pub fn truncate(self) -> Vector2<S> {
        Vector2::new(self.x, self.y)
    }
 }
@ -424,27 +660,27 @@ impl<S: BaseNum> Vector4<S> {
    /// Create a `Vector3`, dropping the `w` value.
    #[inline]
-    pub fn truncate(self)-> Vector3<S> {
+    pub fn truncate(self) -> Vector3<S> {
        Vector3::new(self.x, self.y, self.z)
    }
    /// Create a `Vector3`, dropping the nth element
    #[inline]
-    pub fn truncate_n(&self, n: isize)-> Vector3<S> {
+    pub fn truncate_n(&self, n: isize) -> Vector3<S> {
        match n {
            0 => Vector3::new(self.y, self.z, self.w),
            1 => Vector3::new(self.x, self.z, self.w),
            2 => Vector3::new(self.x, self.y, self.w),
            3 => Vector3::new(self.x, self.y, self.z),
-            _ => panic!("{:?} is out of range", n)
+            _ => panic!("{:?} is out of range", n),
        }
    }
 }
 /// Dot product of two vectors.
 #[inline]
-pub fn dot<V: InnerSpace>(a: V, b: V) -> V::Scalar where
+pub fn dot<V: InnerSpace>(a: V, b: V) -> V::Scalar
-    V::Scalar: BaseFloat,
+    where V::Scalar: BaseFloat
 {
    V::dot(a, b)
 }
@ -515,6 +751,371 @@ impl<S: fmt::Debug> fmt::Debug for Vector4<S> {
    }
 }
 #[cfg(feature = "use_simd")]
 impl From<Simdf32x4> for Vector4<f32> {
    #[inline]
    fn from(f: Simdf32x4) -> Self {
        unsafe {
            let mut ret: Self = mem::uninitialized();
            {
                let ret_mut: &mut [f32; 4] = ret.as_mut();
                f.store(ret_mut.as_mut(), 0 as usize);
            }
            ret
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl Vector4<f32> {
    /// Compute and return the square root of each element.
    #[inline]
    pub fn sqrt_element_wide(self) -> Self {
        let s: Simdf32x4 = self.into();
        s.sqrt().into()
    }
    /// Compute and return the reciprocal of the square root of each element.
    #[inline]
    pub fn rsqrt_element_wide(self) -> Self {
        let s: Simdf32x4 = self.into();
        s.approx_rsqrt().into()
    }
    /// Compute and return the reciprocal of each element.
    #[inline]
    pub fn recip_element_wide(self) -> Self {
        let s: Simdf32x4 = self.into();
        s.approx_reciprocal().into()
    }
 }
 #[cfg(feature = "use_simd")]
 impl Into<Simdf32x4> for Vector4<f32> {
    #[inline]
    fn into(self) -> Simdf32x4 {
        let self_ref: &[f32; 4] = self.as_ref();
        Simdf32x4::load(self_ref.as_ref(), 0 as usize)
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdf32x4]; Sub<Vector4<f32>> for Vector4<f32> {
        fn sub(lhs, rhs) -> Vector4<f32> {
            (lhs - rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{@rs
    [Simdf32x4]; Mul<f32> for Vector4<f32> {
        fn mul(lhs, rhs) -> Vector4<f32> {
            (lhs * rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{@rs
    [Simdf32x4]; Div<f32> for Vector4<f32> {
        fn div(lhs, rhs) -> Vector4<f32> {
            (lhs / rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdf32x4]; Neg for Vector4<f32> {
        fn neg(lhs) -> Vector4<f32> {
            (-lhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl AddAssign for Vector4<f32> {
    #[inline]
    fn add_assign(&mut self, rhs: Self) {
        let s: Simdf32x4 = (*self).into();
        let rhs: Simdf32x4 = rhs.into();
        *self = (s + rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl SubAssign for Vector4<f32> {
    #[inline]
    fn sub_assign(&mut self, rhs: Self) {
        let s: Simdf32x4 = (*self).into();
        let rhs: Simdf32x4 = rhs.into();
        *self = (s - rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl MulAssign<f32> for Vector4<f32> {
    fn mul_assign(&mut self, other: f32) {
        let s: Simdf32x4 = (*self).into();
        let other = Simdf32x4::splat(other);
        *self = (s * other).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl DivAssign<f32> for Vector4<f32> {
    fn div_assign(&mut self, other: f32) {
        let s: Simdf32x4 = (*self).into();
        let other = Simdf32x4::splat(other);
        *self = (s / other).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl ElementWise for Vector4<f32> {
    #[inline] fn add_element_wise(self, rhs: Vector4<f32>) -> Vector4<f32> { self + rhs }
    #[inline] fn sub_element_wise(self, rhs: Vector4<f32>) -> Vector4<f32> { self - rhs }
    #[inline] fn mul_element_wise(self, rhs: Vector4<f32>) -> Vector4<f32> {
        let s: Simdf32x4 = self.into();
        let rhs: Simdf32x4 = rhs.into();
        (s * rhs).into()
    }
    #[inline] fn div_element_wise(self, rhs: Vector4<f32>) -> Vector4<f32> {
        let s: Simdf32x4 = self.into();
        let rhs: Simdf32x4 = rhs.into();
        (s / rhs).into()
    }
    #[inline] fn add_assign_element_wise(&mut self, rhs: Vector4<f32>) { (*self) += rhs; }
    #[inline] fn sub_assign_element_wise(&mut self, rhs: Vector4<f32>) { (*self) -= rhs; }
    #[inline] fn mul_assign_element_wise(&mut self, rhs: Vector4<f32>) {
        let s: Simdf32x4 = (*self).into();
        let rhs: Simdf32x4 = rhs.into();
        *self = (s * rhs).into();
    }
    #[inline] fn div_assign_element_wise(&mut self, rhs: Vector4<f32>) {
        let s: Simdf32x4 = (*self).into();
        let rhs: Simdf32x4 = rhs.into();
        *self = (s * rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl ElementWise<f32> for Vector4<f32> {
    #[inline] fn add_element_wise(self, rhs: f32) -> Vector4<f32> {
        let s: Simdf32x4 = self.into();
        let rhs = Simdf32x4::splat(rhs);
        (s + rhs).into()
    }
    #[inline] fn sub_element_wise(self, rhs: f32) -> Vector4<f32> {
        let s: Simdf32x4 = self.into();
        let rhs = Simdf32x4::splat(rhs);
        (s - rhs).into()
    }
    #[inline] fn mul_element_wise(self, rhs: f32) -> Vector4<f32> { self * rhs }
    #[inline] fn div_element_wise(self, rhs: f32) -> Vector4<f32> { self / rhs }
    #[inline] fn add_assign_element_wise(&mut self, rhs: f32) {
        let s: Simdf32x4 = (*self).into();
        let rhs = Simdf32x4::splat(rhs);
        *self = (s + rhs).into();
    }
    #[inline] fn sub_assign_element_wise(&mut self, rhs: f32) {
        let s: Simdf32x4 = (*self).into();
        let rhs = Simdf32x4::splat(rhs);
        *self = (s - rhs).into();
    }
    #[inline] fn mul_assign_element_wise(&mut self, rhs: f32) { (*self) *= rhs; }
    #[inline] fn div_assign_element_wise(&mut self, rhs: f32) { (*self) /= rhs; }
 }
 #[cfg(feature = "use_simd")]
 impl From<Simdi32x4> for Vector4<i32> {
    #[inline]
    fn from(f: Simdi32x4) -> Self {
        unsafe {
            let mut ret: Self = mem::uninitialized();
            {
                let ret_mut: &mut [i32; 4] = ret.as_mut();
                f.store(ret_mut.as_mut(), 0 as usize);
            }
            ret
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl Into<Simdi32x4> for Vector4<i32> {
    #[inline]
    fn into(self) -> Simdi32x4 {
        let self_ref: &[i32; 4] = self.as_ref();
        Simdi32x4::load(self_ref.as_ref(), 0 as usize)
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdi32x4]; Add<Vector4<i32>> for Vector4<i32> {
        fn add(lhs, rhs) -> Vector4<i32> {
            (lhs + rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdi32x4]; Sub<Vector4<i32>> for Vector4<i32> {
        fn sub(lhs, rhs) -> Vector4<i32> {
            (lhs - rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{@rs
    [Simdi32x4]; Mul<i32> for Vector4<i32> {
        fn mul(lhs, rhs) -> Vector4<i32> {
            (lhs * rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdi32x4]; Neg for Vector4<i32> {
        fn neg(lhs) -> Vector4<i32> {
            (-lhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl AddAssign for Vector4<i32> {
    #[inline]
    fn add_assign(&mut self, rhs: Self) {
        let s: Simdi32x4 = (*self).into();
        let rhs: Simdi32x4 = rhs.into();
        *self = (s + rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl SubAssign for Vector4<i32> {
    #[inline]
    fn sub_assign(&mut self, rhs: Self) {
        let s: Simdi32x4 = (*self).into();
        let rhs: Simdi32x4 = rhs.into();
        *self = (s - rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl MulAssign<i32> for Vector4<i32> {
    fn mul_assign(&mut self, other: i32) {
        let s: Simdi32x4 = (*self).into();
        let other = Simdi32x4::splat(other);
        *self = (s * other).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl From<Simdu32x4> for Vector4<u32> {
    #[inline]
    fn from(f: Simdu32x4) -> Self {
        unsafe {
            let mut ret: Self = mem::uninitialized();
            {
                let ret_mut: &mut [u32; 4] = ret.as_mut();
                f.store(ret_mut.as_mut(), 0 as usize);
            }
            ret
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl Into<Simdu32x4> for Vector4<u32> {
    #[inline]
    fn into(self) -> Simdu32x4 {
        let self_ref: &[u32; 4] = self.as_ref();
        Simdu32x4::load(self_ref.as_ref(), 0 as usize)
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdu32x4]; Add<Vector4<u32>> for Vector4<u32> {
        fn add(lhs, rhs) -> Vector4<u32> {
            (lhs + rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdf32x4]; Add<Vector4<f32>> for Vector4<f32> {
        fn add(lhs, rhs) -> Vector4<f32> {
            (lhs + rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{
    [Simdu32x4]; Sub<Vector4<u32>> for Vector4<u32> {
        fn sub(lhs, rhs) -> Vector4<u32> {
            (lhs - rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl_operator_simd!{@rs
    [Simdu32x4]; Mul<u32> for Vector4<u32> {
        fn mul(lhs, rhs) -> Vector4<u32> {
            (lhs * rhs).into()
        }
    }
 }
 #[cfg(feature = "use_simd")]
 impl AddAssign for Vector4<u32> {
    #[inline]
    fn add_assign(&mut self, rhs: Self) {
        let s: Simdu32x4 = (*self).into();
        let rhs: Simdu32x4 = rhs.into();
        *self = (s + rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl SubAssign for Vector4<u32> {
    #[inline]
    fn sub_assign(&mut self, rhs: Self) {
        let s: Simdu32x4 = (*self).into();
        let rhs: Simdu32x4 = rhs.into();
        *self = (s - rhs).into();
    }
 }
 #[cfg(feature = "use_simd")]
 impl MulAssign<u32> for Vector4<u32> {
    fn mul_assign(&mut self, other: u32) {
        let s: Simdu32x4 = (*self).into();
        let other = Simdu32x4::splat(other);
        *self = (s * other).into();
    }
 }
 #[cfg(test)]
 mod tests {
    mod vector2 {
@ -729,7 +1330,12 @@ mod tests {
    mod vector4 {
        use vector::*;
-        const VECTOR4: Vector4<i32> = Vector4 { x: 1, y: 2, z: 3, w: 4 };
+        const VECTOR4: Vector4<i32> = Vector4 {
            x: 1,
            y: 2,
            z: 3,
            w: 4,
        };
        #[test]
        fn test_index() {
@ -796,11 +1402,11 @@ mod tests {
        fn test_as_mut() {
            let mut v = VECTOR4;
            {
-                let v: &mut[i32; 4] = v.as_mut();
+                let v: &mut [i32; 4] = v.as_mut();
                assert_eq!(v, &mut [1, 2, 3, 4]);
            }
            {
-                let v: &mut(i32, i32, i32, i32) = v.as_mut();
+                let v: &mut (i32, i32, i32, i32) = v.as_mut();
                assert_eq!(v, &mut (1, 2, 3, 4));
            }
        }
--- a/tests/quaternion.rs
+++ b/tests/quaternion.rs
@ -194,13 +194,13 @@ mod rotate_from_euler {
        let vec = vec3(0.0, 1.0, 0.0);
        let rot = Quaternion::from(Euler::new(Deg(90.0), Deg(90.0), Deg(0.0)));
-        assert_ulps_eq!(vec3(0.0, 0.0, 1.0), rot * vec);
+        assert_ulps_eq!(vec3(0.0f32, 0.0f32, 1.0f32), rot * vec);
    }
    // tests that the Z rotation is done after the Y
    #[test]
    fn test_y_then_z() {
-        let vec = vec3(0.0, 0.0, 1.0);
+        let vec = vec3(0.0f32, 0.0f32, 1.0f32);
        let rot = Quaternion::from(Euler::new(Deg(0.0), Deg(90.0), Deg(90.0)));
        assert_ulps_eq!(vec3(1.0, 0.0, 0.0), rot * vec);
--- a/tests/vectorf32.rs
+++ b/tests/vectorf32.rs
@ -0,0 +1,267 @@
 // Copyright 2013-2014 The CGMath Developers. For a full listing of the authors,
 // refer to the Cargo.toml file at the top-level directory of this distribution.
 //
 // Licensed under the Apache License, Version 2.0f32 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0f32
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #[macro_use]
 extern crate approx;
 #[macro_use]
 extern crate cgmath;
 use cgmath::*;
 use std::f32;
 #[test]
 fn test_constructor() {
    assert_eq!(vec2(1f32, 2f32), Vector2::new(1f32, 2f32));
    assert_eq!(vec3(1f32, 2f32, 3f32), Vector3::new(1f32, 2f32, 3f32));
    assert_eq!(vec4(1f32, 2f32, 3f32, 4f32), Vector4::new(1f32, 2f32, 3f32, 4f32));
 }
 #[test]
 fn test_from_value() {
    assert_eq!(Vector2::from_value(102f32), Vector2::new(102f32, 102f32));
    assert_eq!(Vector3::from_value(22f32), Vector3::new(22f32, 22f32, 22f32));
    assert_eq!(Vector4::from_value(76.5f32), Vector4::new(76.5f32, 76.5f32, 76.5f32, 76.5f32));
 }
 macro_rules! impl_test_add {
    ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => (
        // vector + vector ops
        assert_eq!($v + $v, $VectorN::new($($v.$field + $v.$field),+));
        assert_eq!(&$v + &$v, $v + $v);
        assert_eq!(&$v + $v, $v + $v);
        assert_eq!($v + &$v, $v + $v);
    )
 }
 macro_rules! impl_test_sub {
    ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => (
        // vector - vector ops
        assert_eq!($v - $v, $VectorN::new($($v.$field - $v.$field),+));
        assert_eq!(&$v - &$v, $v - $v);
        assert_eq!(&$v - $v, $v - $v);
        assert_eq!($v - &$v, $v - $v);
    )
 }
 macro_rules! impl_test_mul {
    ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => (
        // vector * scalar ops
        assert_eq!($v * $s, $VectorN::new($($v.$field * $s),+));
        assert_eq!($s * $v, $VectorN::new($($s * $v.$field),+));
        assert_eq!(&$v * $s, $v * $s);
        assert_eq!($s * &$v, $s * $v);
        // commutativity
        assert_eq!($v * $s, $s * $v);
    )
 }
 macro_rules! impl_test_div {
    ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => (
        // vector / scalar ops
        assert_eq!($v / $s, $VectorN::new($($v.$field / $s),+));
        assert_eq!($s / $v, $VectorN::new($($s / $v.$field),+));
        assert_eq!(&$v / $s, $v / $s);
        assert_eq!($s / &$v, $s / $v);
    )
 }
 macro_rules! impl_test_rem {
    ($VectorN:ident { $($field:ident),+ }, $s:expr, $v:expr) => (
        // vector % scalar ops
        assert_eq!($v % $s, $VectorN::new($($v.$field % $s),+));
        assert_eq!($s % $v, $VectorN::new($($s % $v.$field),+));
        assert_eq!(&$v % $s, $v % $s);
        assert_eq!($s % &$v, $s % $v);
    )
 }
 #[test]
 fn test_add() {
    impl_test_add!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32));
 }
 #[test]
 fn test_sub() {
    impl_test_sub!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32));
    impl_test_sub!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32));
    impl_test_sub!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32));
 }
 #[test]
 fn test_mul() {
    impl_test_mul!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32));
    impl_test_mul!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32));
    impl_test_mul!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32));
 }
 #[test]
 fn test_div() {
    impl_test_div!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32));
    impl_test_div!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32));
    impl_test_div!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32));
 }
 #[test]
 fn test_rem() {
    impl_test_rem!(Vector4 { x, y, z, w }, 2.0f32, vec4(2.0f32, 4.0f32, 6.0f32, 8.0f32));
    impl_test_rem!(Vector3 { x, y, z }, 2.0f32, vec3(2.0f32, 4.0f32, 6.0f32));
    impl_test_rem!(Vector2 { x, y }, 2.0f32, vec2(2.0f32, 4.0f32));
 }
 #[test]
 fn test_dot() {
    assert_eq!(Vector2::new(1.0f32, 2.0f32).dot(Vector2::new(3.0f32, 4.0f32)), 11.0f32);
    assert_eq!(Vector3::new(1.0f32, 2.0f32, 3.0f32).dot(Vector3::new(4.0f32, 5.0f32, 6.0f32)), 32.0f32);
    assert_eq!(Vector4::new(1.0f32, 2.0f32, 3.0f32, 4.0f32).dot(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32)), 70.0f32);
 }
 #[test]
 fn test_sum() {
    assert_eq!(Vector2::new(1f32, 2f32).sum(), 3f32);
    assert_eq!(Vector3::new(1f32, 2f32, 3f32).sum(), 6f32);
    assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).sum(), 10f32);
    assert_eq!(Vector2::new(3.0f32, 4.0f32).sum(), 7.0f32);
    assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).sum(), 15.0f32);
    assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).sum(), 26.0f32);
 }
 #[test]
 fn test_product() {
    assert_eq!(Vector2::new(1f32, 2f32).product(), 2f32);
    assert_eq!(Vector3::new(1f32, 2f32, 3f32).product(), 6f32);
    assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).product(), 24f32);
    assert_eq!(Vector2::new(3.0f32, 4.0f32).product(), 12.0f32);
    assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).product(), 120.0f32);
    assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).product(), 1680.0f32);
 }
 #[test]
 fn test_min() {
    assert_eq!(Vector2::new(1f32, 2f32).min(), 1f32);
    assert_eq!(Vector3::new(1f32, 2f32, 3f32).min(), 1f32);
    assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).min(), 1f32);
    assert_eq!(Vector2::new(3.0f32, 4.0f32).min(), 3.0f32);
    assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).min(), 4.0f32);
    assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).min(), 5.0f32);
 }
 #[test]
 fn test_max() {
    assert_eq!(Vector2::new(1f32, 2f32).max(), 2f32);
    assert_eq!(Vector3::new(1f32, 2f32, 3f32).max(), 3f32);
    assert_eq!(Vector4::new(1f32, 2f32, 3f32, 4f32).max(), 4f32);
    assert_eq!(Vector2::new(3.0f32, 4.0f32).max(), 4.0f32);
    assert_eq!(Vector3::new(4.0f32, 5.0f32, 6.0f32).max(), 6.0f32);
    assert_eq!(Vector4::new(5.0f32, 6.0f32, 7.0f32, 8.0f32).max(), 8.0f32);
 }
 #[test]
 fn test_cross() {
    let a = Vector3::new(1f32, 2f32, 3f32);
    let b = Vector3::new(4f32, 5f32, 6f32);
    let r = Vector3::new(-3f32, 6f32, -3f32);
    assert_eq!(a.cross(b), r);
 }
 #[test]
 fn test_is_perpendicular() {
    assert!(Vector2::new(1.0f32, 0.0f32).is_perpendicular(Vector2::new(0.0f32, 1.0f32)));
    assert!(Vector3::new(0.0f32, 1.0f32, 0.0f32).is_perpendicular(Vector3::new(0.0f32, 0.0f32, 1.0f32)));
    assert!(Vector4::new(1.0f32, 0.0f32, 0.0f32, 0.0f32).is_perpendicular(Vector4::new(0.0f32, 0.0f32, 0.0f32, 1.0f32)));
 }
 #[cfg(test)]
 mod test_magnitude {
    use cgmath::*;
    #[test]
    fn test_vector2(){
        let (a, a_res) = (Vector2::new(3.0f32, 4.0f32), 5.0f32); // (3, 4, 5) Pythagorean triple
        let (b, b_res) = (Vector2::new(5.0f32, 12.0f32), 13.0f32); // (5, 12, 13) Pythagorean triple
        assert_eq!(a.magnitude2(), a_res * a_res);
        assert_eq!(b.magnitude2(), b_res * b_res);
        assert_eq!(a.magnitude(), a_res);
        assert_eq!(b.magnitude(), b_res);
    }
    #[test]
    fn test_vector3(){
        let (a, a_res) = (Vector3::new(2.0f32, 3.0f32, 6.0f32), 7.0f32); // (2, 3, 6, 7) Pythagorean quadruple
        let (b, b_res) = (Vector3::new(1.0f32, 4.0f32, 8.0f32), 9.0f32); // (1, 4, 8, 9) Pythagorean quadruple
        assert_eq!(a.magnitude2(), a_res * a_res);
        assert_eq!(b.magnitude2(), b_res * b_res);
        assert_eq!(a.magnitude(), a_res);
        assert_eq!(b.magnitude(), b_res);
    }
    #[test]
    fn test_vector4(){
        let (a, a_res) = (Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32), 11.0f32); // (1, 2, 4, 10, 11) Pythagorean quintuple
        let (b, b_res) = (Vector4::new(1.0f32, 2.0f32, 8.0f32, 10.0f32), 13.0f32); // (1, 2, 8, 10, 13) Pythagorean quintuple
        assert_eq!(a.magnitude2(), a_res * a_res);
        assert_eq!(b.magnitude2(), b_res * b_res);
        assert_eq!(a.magnitude(), a_res);
        assert_eq!(b.magnitude(), b_res);
        #[cfg(feature = "use_simd")]
        {
            let a = Vector4::new(1f32, 4f32, 9f32, 16f32);
            assert_ulps_eq!(a.sqrt_element_wide(), Vector4::new(1f32, 2f32, 3f32, 4f32));
            assert_relative_eq!(a.sqrt_element_wide().recip_element_wide(), Vector4::new(1f32, 1f32/2f32, 1f32/3f32, 1f32/4f32), max_relative = 0.005f32);
            assert_relative_eq!(a.rsqrt_element_wide(), Vector4::new(1f32, 1f32/2f32, 1f32/3f32, 1f32/4f32), max_relative= 0.005f32);
        }
    }
 }
 #[test]
 fn test_angle() {
    assert_ulps_eq!(Vector2::new(1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2));
    assert_ulps_eq!(Vector2::new(10.0f32, 0.0f32).angle(Vector2::new(0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2));
    assert_ulps_eq!(Vector2::new(-1.0f32, 0.0f32).angle(Vector2::new(0.0f32, 1.0f32)), &-Rad(f32::consts::FRAC_PI_2));
    assert_ulps_eq!(Vector3::new(1.0f32, 0.0f32, 1.0f32).angle(Vector3::new(1.0f32, 1.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3));
    assert_ulps_eq!(Vector3::new(10.0f32, 0.0f32, 10.0f32).angle(Vector3::new(5.0f32, 5.0f32, 0.0f32)), &Rad(f32::consts::FRAC_PI_3));
    assert_ulps_eq!(Vector3::new(-1.0f32, 0.0f32, -1.0f32).angle(Vector3::new(1.0f32, -1.0f32, 0.0f32)), &Rad(2.0f32 * f32::consts::FRAC_PI_3));
    assert_ulps_eq!(Vector4::new(1.0f32, 0.0f32, 1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2));
    assert_ulps_eq!(Vector4::new(10.0f32, 0.0f32, 10.0f32, 0.0f32).angle(Vector4::new(0.0f32, 5.0f32, 0.0f32, 5.0f32)), &Rad(f32::consts::FRAC_PI_2));
    assert_ulps_eq!(Vector4::new(-1.0f32, 0.0f32, -1.0f32, 0.0f32).angle(Vector4::new(0.0f32, 1.0f32, 0.0f32, 1.0f32)), &Rad(f32::consts::FRAC_PI_2));
 }
 #[test]
 fn test_normalize() {
    // TODO: test normalize_to, normalize_sel.0f32, and normalize_self_to
    assert_ulps_eq!(Vector2::new(3.0f32, 4.0f32).normalize(), &Vector2::new(3.0f32/5.0f32, 4.0f32/5.0f32));
    assert_ulps_eq!(Vector3::new(2.0f32, 3.0f32, 6.0f32).normalize(), &Vector3::new(2.0f32/7.0f32, 3.0f32/7.0f32, 6.0f32/7.0f32));
    assert_ulps_eq!(Vector4::new(1.0f32, 2.0f32, 4.0f32, 10.0f32).normalize(), &Vector4::new(1.0f32/11.0f32, 2.0f32/11.0f32, 4.0f32/11.0f32, 10.0f32/11.0f32));
 }
 #[test]
 fn test_cast() {
    assert_ulps_eq!(Vector2::new(0.9f32, 1.5).cast(), Vector2::new(0.9f32, 1.5));
    assert_ulps_eq!(Vector3::new(1.0f32, 2.4, -3.13).cast(), Vector3::new(1.0f32, 2.4, -3.13));
    assert_ulps_eq!(Vector4::new(13.5f32, -4.6, -8.3, 2.41).cast(), Vector4::new(13.5f32, -4.6, -8.3, 2.41));
 }