Debian上Fortran并行计算实操指南

一 环境准备
二 方法一 OpenMP 共享内存并行
program mainuse omp_libimplicit noneinteger :: i, n = 1000000, tid, nthreadsreal(kind=8) :: s, x
s = 0.0d0!$omp parallel private(tid,x) shared(s,n) reduction(+:s)tid = omp_get_thread_num()nthreads = omp_get_num_threads()!$omp dodo i = 1, nx = dble(i)s = s + x*xend do!$omp end do!$omp end parallel
if (omp_get_thread_num() == 0) thenprint ‘("Threads = ", i0, "Sum = ", f0.0)’, nthreads, send ifend program main
三 方法二 MPI 分布式内存并行
program mainuse mpi_f08implicit nonetype(MPI_Comm) :: comminteger :: rank, size, ierrreal(kind=8) :: t0, t1
call MPI_Init(ierr)comm = MPI_COMM_WORLDcall MPI_Comm_rank(comm, rank, ierr)call MPI_Comm_size(comm, size, ierr)
call cpu_time(t0)! 示例:各进程计算局部求和 1…(rank+1)*1000blockinteger :: i, local_nreal(kind=8) :: local_sum, global_sumlocal_n = (rank + 1) * 1000local_sum = 0.0d0do i = 1, local_nlocal_sum = local_sum + dble(i)end docall MPI_Reduce(local_sum, global_sum, 1, MPI_DOUBLE_PRECISION, &MPI_SUM, 0, comm, ierr)call cpu_time(t1)if (rank == 0) thenprint ‘("Rank ", i0, “/”, i0, "Local sum = ", f0.0, "Global sum = ", f0.0)’, &rank, size, local_sum, global_sumprint ‘("Elapsed time = “, f0.3, " s”)’, t1 - t0end ifend block
call MPI_Finalize(ierr)end program main
四 多节点与性能优化建议