Julia: Add @inbounds to generic_matvecmul!

Created on 10 Jun 2018  路  3Comments  路  Source: JuliaLang/julia

I'm a bit confused why generic_matvecmul! doesn't use @inbounds for the forlloops, as it makes it 5x faster:

julia> import LinearAlgebra: lapack_size

julia> function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::AbstractVector) where R
           mB = length(B)
           mA, nA = lapack_size(tA, A)
           if mB != nA
               throw(DimensionMismatch("matrix A has dimensions ($mA,$nA), vector B has length $mB"))
           end
           if mA != length(C)
               throw(DimensionMismatch("result C has length $(length(C)), needs length $mA"))
           end

           Astride = size(A, 1)

           if tA == 'T'  # fastest case
               for k = 1:mA
                   aoffs = (k-1)*Astride
                   if mB == 0
                       s = zero(R)
                   else
                       s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
                   end
                   for i = 1:nA
                       s += transpose(A[aoffs+i]) * B[i]
                   end
                   C[k] = s
               end
           elseif tA == 'C'
               for k = 1:mA
                   aoffs = (k-1)*Astride
                   if mB == 0
                       s = zero(R)
                   else
                       s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
                   end
                   for i = 1:nA
                       s += A[aoffs + i]'B[i]
                   end
                   C[k] = s
               end
           else # tA == 'N'
               @inbounds for i = 1:mA
                   if mB == 0
                       C[i] = zero(R)
                   else
                       C[i] = zero(A[i]*B[1] + A[i]*B[1])
                   end
               end
               @inbounds for k = 1:mB
                   aoffs = (k-1)*Astride
                   b = B[k]
                   for i = 1:mA
                       C[i] += A[aoffs + i] * b
                   end
               end
           end
           C
       end
generic_matvecmul! (generic function with 1 method)

julia> n = 1000; A = randn(n,n); x = randn(n); y = randn(n);

julia> @time generic_matvecmul!(y, 'N', A, x);
  0.000530 seconds (4 allocations: 160 bytes)

julia> @time LinearAlgebra.generic_matvecmul!(y, 'N', A, x);
  0.001840 seconds (4 allocations: 160 bytes)
linear algebra performance

Most helpful comment

OK, fine, 3x speedup:

julia> @btime generic_matvecmul!(y, 'N', A, x);
  284.819 渭s (0 allocations: 0 bytes)

julia> @btime LinearAlgebra.generic_matvecmul!(y, 'N', A, x);
  840.900 渭s (0 allocations: 0 bytes)

All 3 comments

Regarding the benchmarks, I imagine you performed warmup runs but omitted them from the above (and are aware of BenchmarkTools)? :)

The conclusion to https://github.com/JuliaLang/julia/issues/20469 suggests that judicious use of @inbounds in such generic methods should be alright.

OK, fine, 3x speedup:

julia> @btime generic_matvecmul!(y, 'N', A, x);
  284.819 渭s (0 allocations: 0 bytes)

julia> @btime LinearAlgebra.generic_matvecmul!(y, 'N', A, x);
  840.900 渭s (0 allocations: 0 bytes)
Was this page helpful?
0 / 5 - 0 ratings

Related issues

wilburtownsend picture wilburtownsend  路  3Comments

sbromberger picture sbromberger  路  3Comments

Keno picture Keno  路  3Comments

ararslan picture ararslan  路  3Comments

omus picture omus  路  3Comments