Replies: 2 comments
-
| extern crate ndarray_linalg;
extern crate netlib_src;
use criterion::{Benchmark, Criterion, black_box, criterion_group, criterion_main};
use ndarray::{arr1, Array1};
use nalgebra::Vector3;
use rand::random;
fn benchmark(c: &mut Criterion) {
    c.bench(
        "dot",
        Benchmark::new("ndarray", |b| {
            let b1: Array1<f64> = Array1::from_vec(vec![random(), random(), random()]);
            let b2: Array1<f64> = Array1::from_vec(vec![random(), random(), random()]);
            b.iter(|| black_box(b1.dot(&b2)));
        })
        .with_function("nalgebra", |b| {
            let b1 = Vector3::<f64>::new(random(), random(), random());
            let b2 = Vector3::<f64>::new(random(), random(), random());
            b.iter(|| black_box(b1.dot(&b2)));
        })
    );
}
criterion_group!(benches, benchmark);
criterion_main!(benches);I ran the benchmark, and got the following result dot/ndarray             time:   [7.3657 ns 7.4323 ns 7.5013 ns]
                        change: [-1.9526% -0.4306% +1.1328%] (p = 0.60 > 0.05)
                        No change in performance detected.
Found 4 outliers among 100 measurements (4.00%)
  3 (3.00%) high mild
  1 (1.00%) high severe
dot/nalgebra            time:   [684.51 ps 692.21 ps 700.52 ps]
                        change: [+0.8324% +2.5543% +4.2500%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 4 outliers among 100 measurements (4.00%)
  2 (2.00%) high mild
  2 (2.00%) high severeAfter I made all the relevant (for this particular case)  dot/ndarray             time:   [1.9718 ns 1.9925 ns 2.0154 ns]
                        change: [-73.509% -73.050% -72.590%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
  4 (4.00%) high mild
  1 (1.00%) high severe
dot/nalgebra            time:   [695.66 ps 702.03 ps 708.21 ps]
                        change: [-2.1110% +0.1197% +2.5369%] (p = 0.93 > 0.05)
                        No change in performance detected.
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severeOnce I even get 890ps or so by more aggressively commenting out the code and leaving only logic required for  Anyway here is a patch if you'd like to reproduce the results diff --git a/src/linalg/impl_linalg.rs b/src/linalg/impl_linalg.rs
index a40f386..7e06e13 100644
--- a/src/linalg/impl_linalg.rs
+++ b/src/linalg/impl_linalg.rs
@@ -56,6 +56,7 @@ where
     /// **Panics** if the array shapes are incompatible.<br>
     /// *Note:* If enabled, uses blas `dot` for elements of `f32, f64` when memory
     /// layout allows.
+    #[inline(always)]
     pub fn dot<Rhs>(&self, rhs: &Rhs) -> <Self as Dot<Rhs>>::Output
     where
         Self: Dot<Rhs>,
@@ -63,6 +64,7 @@ where
         Dot::dot(self, rhs)
     }
+    #[inline(always)]
     fn dot_generic<S2>(&self, rhs: &ArrayBase<S2, Ix1>) -> A
     where
         S2: Data<Elem = A>,
@@ -72,6 +74,31 @@ where
         assert!(self.len() == rhs.len());
         if let Some(self_s) = self.as_slice() {
             if let Some(rhs_s) = rhs.as_slice() {
+                let vector_len = self_s.len();
+                if vector_len == 2 {
+                    return unsafe {
+                        let a = *self_s.get_unchecked(0) * *rhs_s.get_unchecked(0);
+                        let b = *self_s.get_unchecked(1) * *rhs_s.get_unchecked(1);
+                        a + b
+                    };
+                } else if vector_len == 3 {
+                    return unsafe {
+                        let a = *self_s.get_unchecked(0) * *rhs_s.get_unchecked(0);
+                        let b = *self_s.get_unchecked(1) * *rhs_s.get_unchecked(1);
+                        let c = *self_s.get_unchecked(2) * *rhs_s.get_unchecked(2);
+                        a + b + c
+                    };
+                } else if vector_len == 4 {
+                    return unsafe {
+                        let mut a = *self_s.get_unchecked(0) * *rhs_s.get_unchecked(0);
+                        let mut b = *self_s.get_unchecked(1) * *rhs_s.get_unchecked(1);
+                        let c = *self_s.get_unchecked(2) * *rhs_s.get_unchecked(2);
+                        let d = *self_s.get_unchecked(3) * *rhs_s.get_unchecked(3);
+                        a = a + c;
+                        b = b + d;
+                        a + b
+                    };
+                }
                 return numeric_util::unrolled_dot(self_s, rhs_s);
             }
         }
@@ -94,6 +121,7 @@ where
     }
     #[cfg(feature = "blas")]
+    #[inline(always)]
     fn dot_impl<S2>(&self, rhs: &ArrayBase<S2, Ix1>) -> A
     where
         S2: Data<Elem = A>,
@@ -182,6 +210,7 @@ where
     /// **Panics** if the arrays are not of the same length.<br>
     /// *Note:* If enabled, uses blas `dot` for elements of `f32, f64` when memory
     /// layout allows.
+    #[inline(always)]
     fn dot(&self, rhs: &ArrayBase<S2, Ix1>) -> A {
         self.dot_impl(rhs)
     } | 
Beta Was this translation helpful? Give feedback.
-
| Hi Rolf, While of course it's welcome to improve upon this — it should not be done at the expense of our core competency which is handling big arrays. Do you have an array of 100 elements or 10000 elements? Then it's our game. | 
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
I made a comparison between ndarray and nalgebra dot products of 3d-Vectors. The performance difference is very large whith ndarray being 12 times slower than nalgebra.
Please find the code below. Did I do anything wrong? I would like to create a python module that calls rust functions and as far as I have seen this is easily possible with ndarray but not nalgebra.
Thanks in advance
Rolf
output:
sum1 = 3599999.999718683 dt1 = 114209136
sum2 = 3599999.999718683 dt2 = 8604722
main.rs:
#![allow(non_snake_case)]
#![allow(unused)]
#[macro_use]
extern crate ndarray;
extern crate nalgebra;
use ndarray::prelude::*;
use ndarray::{arr1, arr2, Array1, Array2};
use nalgebra::Vector3;
use std::time::{Duration, Instant};
fn main() {
let n = 10000000;
let mut sum1 = 0.0;
let b1 = arr1(&[1.0, 0.2, 0.3]);
let b2 = arr1(&[0.1, 1.0, 0.2]);
let t1 = Instant::now();
for i in 0..n {
sum1 += b1.dot(&b2);
}
let dt1 = t1.elapsed().as_nanos();
println!("sum1 = {} dt1 = {}", sum1, dt1);
}
Cargo.toml:
[package]
name = "dottest"
version = "0.1.0"
authors = ["ropewe56"]
edition = "2018"
[dependencies]
ndarray = "0.12.1"
nalgebra = "0.18.0"
Beta Was this translation helpful? Give feedback.
All reactions