Scientific Computing, TU Berlin, WS 2019/2020, Lecture 08
Jürgen Fuhrmann, WIAS Berlin
.jl
Write a Julia program which calculates $\sum_{n=1}^K \frac1{n^2}$ for $K=10,100,1000,10000, 100000$ and report the values for \verb|Float16,Float32, Float64|. Compare the results to the value of $\sum_{n=1}^\infty \frac1{n^2}$
We have $\sum_{n=1}^\infty \frac1{n^2}=\frac{\pi^2}{6}.$
function baselsum(T::Type, n)
s=zero(T)
eins=one(T)
for k=1:n
x=T(k)
s+=eins/(x*x)
end
s
end
baselerror(T::Type,n)= abs(baselsum(T,n)-pi^2/6)
@show baselerror(Float16,10000)
@show baselerror(Float32,10000)
@show baselerror(Float64,10000)
@show baselerror(BigFloat,10000)
using BenchmarkTools
@btime baselerror(Float16,10000)
@btime baselerror(Float32,10000)
@btime baselerror(Float64,10000)
@btime baselerror(BigFloat,10000)
function rbaselsum(T::Type, n)
s=zero(T)
eins=one(T)
for k=n:-1:1
x=T(k)
s+=eins/(x*x)
end
s
end
rbaselerror(T::Type,n)= abs(rbaselsum(T,n)-pi^2/6)
@show rbaselerror(Float16,10000)
@show rbaselerror(Float32,10000)
@show rbaselerror(Float64,10000)
@show rbaselerror(BigFloat,10000)
function kbaselsum(T::Type, n)
sum=zero(T)
error_compensation=zero(T)
eins=one(T)
for k=1:n
x=T(k)
increment=eins/(x*x)
corrected_increment=increment-error_compensation;
good_sum=sum+corrected_increment;
error_compensation= (good_sum-sum)-corrected_increment;
sum=good_sum
end
sum
end
kbaselerror(T::Type,n)= abs(kbaselsum(T,n)-pi^2/6)
@show kbaselerror(Float16,10000)
@show kbaselerror(Float32,10000)
@show kbaselerror(Float64,10000)
@show kbaselerror(BigFloat,10000)
Plotting...
using Plots
Ns=[10^i for i=1:9]
p=plot(xlabel="N",ylabel="error",xaxis=:log, yaxis=:log,legend=:bottomleft)
plot!(p,Ns,baselerror.(Float32,Ns),label="Float32",markershape=:+,color=:red)
plot!(p,Ns,baselerror.(Float64,Ns),label="Float64",markershape=:+,color=:green)
plot!(p,Ns,rbaselerror.(Float32,Ns),label="Float32,rev",markershape=:x,color=:red)
plot!(p,Ns,rbaselerror.(Float64,Ns),label="Float64,rev",markershape=:x,color=:green)
plot!(p,Ns,kbaselerror.(Float32,Ns),label="Float32,kahan",markershape=:circle,color=:red)
plot!(p,Ns,kbaselerror.(Float64,Ns),label="Float64,kahan",markershape=:circle,color=:green)
Left boundary condition: \begin{align*} -u'(0)+ \alpha u(0) &=0\\ C+\alpha D&=0\\ C&=-\alpha D \end{align*}
Right boundary condition:
u(x,alpha)=0.5*(-x*x +x + 1/alpha)
u_exact(N,alpha)=u.(collect(0:1/(N-1):1),alpha)
function setup(N,alpha)
h=1.0/(N-1)
a=[-1/h for i=1:N-1]
b=[2/h for i=1:N]
c=[-1/h for i=1:N-1]
b[1]=alpha+1/h
b[N]=alpha+1/h
f=[h for i=1:N]
f[1]=h/2
f[N]=h/2
return a,b,c,f
end
Correctness check
check(N,alpha,solver)=norm(solver(setup(N,alpha)...)-u_exact(N,alpha))
Setup tools
using LinearAlgebra
using SparseArrays
Progonka (c) Daniel Kind, Alon Cohn
Returns the solution u of the tridiagonal system defined by a,b,c,f where
function progonka(a,b,c,f)
N = size(f,1)
u=Vector{eltype(a)}(undef,N)
Alpha=Vector{eltype(a)}(undef,N)
Beta=Vector{eltype(a)}(undef,N)
Alpha[2] = -c[1]/b[1]
Beta[2] = f[1]/b[1]
for i in 2:N-1 #Forward Sweep
Alpha[i+1]=-c[i]/(a[i-1]*Alpha[i]+b[i])
Beta[i+1]=(f[i]-a[i-1]*Beta[i])/(a[i-1]*Alpha[i]+b[i])
end
u[N]=(f[N]-a[N-1]*Beta[N])/(a[N-1]*Alpha[N]+b[N])
for i in N-1:-1:1 #Backward Sweep
u[i]=Alpha[i+1]*u[i+1]+Beta[i+1]
end
return u
end
Setup data
alpha=1
N=1000
a,b,c,f=setup(N,alpha)
Check correctness of solution
@show check(N,alpha,progonka)
Benchmark
@btime progonka(a,b,c,f);
Progonka in C
open("progonka.c", "w") do io
write(io, """
void progonka(int N,double* u,double* a,double* b,double* c,double* f,double* Alpha,double* Beta)
{
int i;
/* Adjust indexing:
This is C pointer arithmetic. Shifting the start addresses by 1
allows to keep the indexing from 1.
*/
u--;
a--;
b--;
c--;
f--;
Alpha--;
Beta--;
Alpha[2] = -c[1]/b[1];
Beta[2] = f[1]/b[1];
for(i=2;i<=N-1;i++)
{
Alpha[i+1]=-c[i]/(a[i-1]*Alpha[i]+b[i]);
Beta[i+1]=(f[i]-a[i-1]*Beta[i])/(a[i-1]*Alpha[i]+b[i]);
}
u[N]=(f[N]-a[N-1]*Beta[N])/(a[N-1]*Alpha[N]+b[N]);
for(i=N-1;i>=1;-i--)
{
u[i]=Alpha[i+1]*u[i+1]+Beta[i+1];
}
}
""")
end
run(`gcc -Ofast --shared progonka.c -o progonka.so`)
function cprogonka(a,b,c,f)
u=Vector{eltype(a)}(undef,N)
Alpha=Vector{eltype(a)}(undef,N)
Beta=Vector{eltype(a)}(undef,N)
ccall( (:progonka,"progonka"), Cvoid,(Cint,Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},),
N,u,a,b,c,f,Alpha,Beta)
return u
end
@show check(N,alpha,cprogonka)
@btime cprogonka(a,b,c,f);
Julia won...
Progonka in Fortran
open("fprogonka.f", "w") do io
write(io, """
subroutine fprogonka(N,u,a,b,c,f,Alpha,Beta)
integer*4 i
integer*4 N
real*8 u(N),a(N-1),b(N),c(N-1),f(N),Alpha(N),Beta(N)
Alpha(2) = -c(1)/b(1)
Beta(2) = f(1)/b(1)
do i=2,N-1
Alpha(i+1)=-c(i)/(a(i-1)*Alpha(i)+b(i));
Beta(i+1)=(f(i)-a(i-1)*Beta(i))/(a(i-1)*Alpha(i)+b(i));
enddo
u(N)=(f(N)-a(N-1)*Beta(N))/(a(N-1)*Alpha(N)+b(N));
do i=N-1,1,-1
u(i)=Alpha(i+1)*u(i+1)+Beta(i+1);
enddo
end
""")
end
run(`gfortran -Ofast --shared fprogonka.f -o fprogonka.so`)
_
at the end of the function name
function fprogonka(a,b,c,f)
u=Vector{eltype(a)}(undef,N)
Alpha=Vector{eltype(a)}(undef,N)
Beta=Vector{eltype(a)}(undef,N)
ccall( (:fprogonka_,"fprogonka"), Cvoid,(Ref{Int64},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},Ptr{Cdouble},),
Ref{Int64}(N),u,a,b,c,f,Alpha,Beta)
u
end
@show check(N,alpha,fprogonka)
@btime fprogonka(a,b,c,f);
Julia won...
Julia tridiagonal solver
function julia_tri(a,b,c,f)
u=zeros(N)
A=Tridiagonal(a,b,c)
Alu = lu(A,Val(false))
Alu\f
end
@show check(N,alpha,julia_tri)
@btime julia_tri(a,b,c,f);
$\dots$ only half as fast as progonka
Julia dense matrix
function julia_dense(a,b,c,f)
u=zeros(N)
A=Matrix(Tridiagonal(a,b,c))
Alu = lu(A)
Alu\f
end
@show check(N,alpha,julia_dense)
@btime julia_dense(a,b,c,f);
This is what we expected: handling of the dense matrix is expensive
Julia dense matrix
function julia_inv(a,b,c,f)
u=zeros(N)
Ainv=inv(Tridiagonal(a,b,c))
Ainv*f
end
@show check(N,alpha,julia_inv)
@btime julia_inv(a,b,c,f);
Julia sparse matrix
function julia_sparse(a,b,c,f)
u=zeros(N)
A=SparseMatrixCSC(Tridiagonal(a,b,c))
Alu = lu(A)
Alu\f
end
@show check(N,alpha,julia_sparse)
@btime julia_sparse(a,b,c,f);
Julia sparse matrix solver with assembled sparse matrix
function julia_sparse(A,f)
u=zeros(N)
Alu = lu(A)
Alu\f
end
A=SparseMatrixCSC(Tridiagonal(a,b,c))
@btime julia_sparse(A,f);
\
is faster than storing LU in between.
function sparse_csc(M::Tridiagonal)
N=size(M,1)
A=spzeros(N,N)
A[1,1]=M[1,1]
A[2,1]=M[2,1]
for i=2:N-1
A[i-1,i]=M[i-1,i]
A[i,i]=M[i,i]
A[i+1,i]=M[i+1,i]
end
A[N-1,N]=M[N-1,N]
A[N,N]=M[N,N]
A
end
using ExtendableSparse
function sparse_ext(M::Tridiagonal)
N=size(M,1)
A=ExtendableSparseMatrix{Float64,Int64}(N,N)
A[1,1]=M[1,1]
A[2,1]=M[2,1]
for i=2:N-1
A[i-1,i]=M[i-1,i]
A[i,i]=M[i,i]
A[i+1,i]=M[i+1,i]
end
A[N-1,N]=M[N-1,N]
A[N,N]=M[N,N]
flush!(A)
A.cscmatrix
end
Benchmark them
@btime A=SparseMatrixCSC(Tridiagonal(a,b,c))
@btime A=sparse_csc(Tridiagonal(a,b,c))
@btime A=sparse_ext(Tridiagonal(a,b,c));
using Plots
p=plot()
X=collect(0:1/(N-1):1)
p=plot()
plot!(p,X,u_exact(N,1),label="alpha=1")
plot!(p,X,u_exact(N,10),label="alpha=10")
plot!(p,X,u_exact(N,100),label="alpha=100")
plot!(p,X,u_exact(N,10000),label="alpha=10000")
This notebook was generated using Literate.jl.