diff --git a/app/main.f90 b/app/main.f90 index 99ca6ce..67fb9b9 100644 --- a/app/main.f90 +++ b/app/main.f90 @@ -9,14 +9,14 @@ program main associate(input => input_t()) output = output_t(input, matcha(input)) block - double precision, allocatable :: simulated_distribution(:,:) + double precision, allocatable :: simulated_distribution(:,:), frequency_distribution(:) integer, parameter :: freq=2 integer num_cells num_cells = output%my_num_cells() simulated_distribution = output%simulated_distribution() - simulated_distribution(:,freq) = num_cells*simulated_distribution(:,freq) - call co_sum(simulated_distribution(:,freq), result_image=1) + frequency_distribution = num_cells*simulated_distribution(:,freq) ! copy to work around nagfor bug + call co_sum(frequency_distribution, result_image=1) call co_sum(num_cells, result_image=1) if (this_image()==1) simulated_distribution(:,freq) = simulated_distribution(:,freq)/dble(num_cells) end block diff --git a/example/heat-equation.f90 b/example/heat-equation.f90 index 3c3cb5c..2e7624c 100644 --- a/example/heat-equation.f90 +++ b/example/heat-equation.f90 @@ -76,13 +76,14 @@ module subroutine exchange_halo(self) end interface + real, allocatable :: halo_x(:,:)[:] + end module submodule(subdomain_2D_m) subdomain_2D_s use assertions_m, only : assert implicit none - real, allocatable :: halo_x(:,:)[:] real dx_, dy_ integer, parameter :: west=1, east=2 integer my_nx, nx, ny, me, num_subdomains, my_internal_west, my_internal_east @@ -114,7 +115,8 @@ module subroutine exchange_halo(self) self%s_(my_nx, 2:ny-1) = merge(boundary_val, internal_val, me==num_subdomains) ! east subdomain boundary if (allocated(halo_x)) deallocate(halo_x) - allocate(halo_x(west:east, ny)[*]) + !allocate(halo_x(west:east, ny)[*]) + allocate(halo_x(2, ny)[*]) call self%exchange_halo end procedure @@ -131,7 +133,7 @@ module subroutine exchange_halo(self) integer i, j real, allocatable :: halo_west(:), halo_east(:) - call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)") + !call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)") call assert(allocated(halo_x), "subdomain_2D_t%laplacian: allocated(halo_x)") allocate(laplacian_rhs(my_nx, ny)) @@ -168,8 +170,8 @@ module subroutine exchange_halo(self) end procedure module procedure exchange_halo - if (me>1) halo_x(east,:)[me-1] = self%s_(1,:) - if (me1) halo_x(east,:)[me-1] = self%s_(1,:) + !if (me this_image()) if (command_line%argument_present(["--help"])) then @@ -52,20 +52,22 @@ function functional_programming_time() result(system_time) integer(int64) t_start_functional, t_end_functional, clock_rate integer step real system_time - type(subdomain_t) T + type(subdomain_t), save :: T[*] call T%define(side=1., boundary_val=T_boundary, internal_val=T_internal_initial, n=resolution) - call system_clock(t_start_functional) - associate(dt => T%dx()*T%dy()/(4*alpha)) + call system_clock(t_start_functional) + functional_programming: & do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do functional_programming + + call system_clock(t_end_functional, clock_rate) end associate - call system_clock(t_end_functional, clock_rate) system_time = real(t_end_functional - t_start_functional)/real(clock_rate) associate(L_infinity_norm => maxval(abs(T%values() - T_steady))) @@ -78,18 +80,22 @@ function procedural_programming_time() result(system_time) integer(int64) t_start_procedural, t_end_procedural, clock_rate integer step real system_time - type(subdomain_t) T + type(subdomain_t), save :: T[*] + + call T%define(side=1., boundary_val=0., internal_val=1., n=resolution) associate(dt => T%dx()*T%dy()/(4*alpha)) - call T%define(side=1., boundary_val=0., internal_val=1., n=resolution) call system_clock(t_start_procedural) + procedural_programming: & do step = 1, steps - call T%step(alpha*dt) + sync all + call march(alpha*dt, T) end do procedural_programming + + call system_clock(t_end_procedural, clock_rate) end associate - call system_clock(t_end_procedural, clock_rate) system_time = real(t_end_procedural - t_start_procedural)/real(clock_rate) associate(L_infinity_norm => maxval(abs(T%values() - T_steady))) diff --git a/src/matcha/distribution_s.F90 b/src/matcha/distribution_s.f90 similarity index 87% rename from src/matcha/distribution_s.F90 rename to src/matcha/distribution_s.f90 index fff76fb..d4d14f4 100644 --- a/src/matcha/distribution_s.F90 +++ b/src/matcha/distribution_s.f90 @@ -47,8 +47,11 @@ pure function monotonically_increasing(f) result(monotonic) "distribution_t%cumulative_distribution: allocated(cumulative_distribution_)") call assert(allocated(self%vel_), "distribution_t%cumulative_distribution: allocated(vel_)") - ! Sample from the distribution - call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds) + ! Sample from the distribution + associate(ncells => size(speeds,1), nsteps => size(speeds,2)) + allocate(sampled_speeds(ncells,nsteps)) + call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds) + end associate associate(nsteps => size(speeds,2)) @@ -63,6 +66,9 @@ pure function monotonically_increasing(f) result(monotonic) end associate end associate + if(allocated(my_velocities)) deallocate(my_velocities) + allocate(my_velocities, mold=dir) + call do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) end associate diff --git a/src/matcha/do_concurrent_m.f90 b/src/matcha/do_concurrent_m.f90 index 21869c1..d28691c 100644 --- a/src/matcha/do_concurrent_m.f90 +++ b/src/matcha/do_concurrent_m.f90 @@ -1,6 +1,6 @@ module do_concurrent_m use iso_c_binding, only : c_double, c_int - use t_cell_collection_m, only : t_cell_collection_t, t_cell_collection_bind_C_t + use t_cell_collection_m, only : t_cell_collection_bind_C_t implicit none private public :: do_concurrent_sampled_speeds, do_concurrent_my_velocities, do_concurrent_k, do_concurrent_speeds @@ -11,34 +11,34 @@ module do_concurrent_m pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C) implicit none real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:) - real(c_double), intent(out), allocatable :: sampled_speeds(:,:) + real(c_double), intent(out) :: sampled_speeds(:,:) end subroutine pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C) implicit none integer(c_int), intent(in) :: nsteps real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:) - real(c_double), intent(out), allocatable :: my_velocities(:,:,:) + real(c_double), intent(out) :: my_velocities(:,:,:) end subroutine pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) implicit none real(c_double), intent(in) :: speeds(:), vel(:) - integer(c_int), intent(out), allocatable :: k(:) + integer(c_int), intent(out) :: k(:) end subroutine - pure module subroutine & - do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) bind(C) + pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) & + bind(C) implicit none - integer(c_int), intent(in) :: nintervals, speed, freq, k(:) + integer(c_int), intent(in) :: speed, freq, k(:) real(c_double), intent(in) :: emp_distribution(:,:) - real(c_double), intent(out), allocatable :: output_distribution(:,:) + real(c_double), intent(out) :: output_distribution(:,:) end subroutine module subroutine do_concurrent_speeds(history, speeds) bind(C) implicit none type(t_cell_collection_bind_C_t), intent(in) :: history(:) - real(c_double), intent(out), allocatable :: speeds(:) + real(c_double), intent(out) :: speeds(:) end subroutine end interface diff --git a/src/matcha/do_concurrent_s.f90 b/src/matcha/do_concurrent_s.f90 index 80c37e7..c4798db 100644 --- a/src/matcha/do_concurrent_s.f90 +++ b/src/matcha/do_concurrent_s.f90 @@ -1,15 +1,18 @@ submodule(do_concurrent_m) do_concurrent_s + use assert_m, only : assert use iso_c_binding, only : c_f_pointer implicit none contains - module procedure do_concurrent_sampled_speeds - + pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C) + real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:) + real(c_double), intent(out) :: sampled_speeds(:,:) integer cell, step + + call assert(all(shape(sampled_speeds)==shape(speeds)), "do_concurrent_sampled_speeds: {sampled_,}speeds shape match") associate(ncells => size(speeds,1), nsteps => size(speeds,2)) - allocate(sampled_speeds(ncells,nsteps)) do concurrent(cell = 1:ncells, step = 1:nsteps) associate(k => findloc(speeds(cell,step) >= cumulative_distribution, value=.false., dim=1)-1) sampled_speeds(cell,step) = vel(k) @@ -17,52 +20,53 @@ end do end associate - end procedure - - module procedure do_concurrent_my_velocities - + end subroutine + + pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C) + integer(c_int), intent(in) :: nsteps + real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:) + real(c_double), intent(out) :: my_velocities(:,:,:) integer step - if(allocated(my_velocities)) deallocate(my_velocities) - allocate(my_velocities, mold=dir) - + call assert(all([size(my_velocities,1),size(sampled_speeds,2)] == shape(sampled_speeds)), & + "do_concurrent_my_velocities: argument size match") + call assert(all(size(my_velocities,1)==shape(dir)), "do_concurrent_my_velocities: argument shape match") + do concurrent(step=1:nsteps) my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1) my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2) my_velocities(:,step,3) = sampled_speeds(:,step)*dir(:,step,3) end do - - end procedure - - module procedure do_concurrent_k + end subroutine - integer i + pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) + real(c_double), intent(in) :: speeds(:), vel(:) + integer(c_int), intent(out) :: k(:) + integer i associate(nspeeds => size(speeds)) - if(allocated(k)) deallocate(k) - allocate(k(nspeeds)) do concurrent(i = 1:nspeeds) k(i) = findloc(speeds(i) >= vel, value=.false., dim=1)-1 end do end associate - end procedure - - module procedure do_concurrent_output_distribution + end subroutine + pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) bind(C) + integer(c_int), intent(in) :: speed, freq, k(:) + real(c_double), intent(in) :: emp_distribution(:,:) + real(c_double), intent(out) :: output_distribution(:,:) integer i - if(allocated(output_distribution)) deallocate(output_distribution) - allocate(output_distribution(nintervals,2)) output_distribution(:,freq) = 0.d0 output_distribution(:,speed) = emp_distribution(:,speed) do concurrent(i = 1:size(output_distribution,1)) output_distribution(i,freq) = count(k==i) end do - - end procedure - - module procedure do_concurrent_speeds + end subroutine + module subroutine do_concurrent_speeds(history, speeds) bind(C) + type(t_cell_collection_bind_C_t), intent(in) :: history(:) + real(c_double), intent(out) :: speeds(:) integer i, j, k integer, parameter :: nspacedims=3 @@ -78,19 +82,16 @@ x(i,:,:) = positions end do - associate(t => history%time) - allocate(speeds(ncells*(npositions-1))) - do concurrent(i = 1:npositions-1, j = 1:ncells) - associate( & - u => (x(i+1,j,:) - x(i,j,:))/(t(i+1) - t(i)), & - ij => i + (j-1)*(npositions-1) & - ) - speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)])) - end associate - end do - end associate + do concurrent(i = 1:npositions-1, j = 1:ncells) + associate( & + u => (x(i+1,j,:) - x(i,j,:))/(history(i+1)%time - history(i)%time), & + ij => i + (j-1)*(npositions-1) & + ) + speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)])) + end associate + end do end associate - end procedure + end subroutine -end submodule do_concurrent_s +end submodule do_concurrent_s \ No newline at end of file diff --git a/src/matcha/output_s.f90 b/src/matcha/output_s.f90 index 1bac989..ec621bf 100644 --- a/src/matcha/output_s.f90 +++ b/src/matcha/output_s.f90 @@ -3,7 +3,7 @@ submodule(output_m) output_s use do_concurrent_m, only : do_concurrent_k, do_concurrent_output_distribution, do_concurrent_speeds use t_cell_collection_m, only : t_cell_collection_bind_C_t - use iso_c_binding, only : c_loc, c_double + use iso_c_binding, only : c_double implicit none contains @@ -24,16 +24,26 @@ integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies + associate(npositions => size(self%history_)) + allocate(speeds(self%my_num_cells()*(npositions-1))) + end associate call do_concurrent_speeds(t_cell_collection_bind_C_t(self%history_), speeds) - associate(emp_distribution => self%input_%sample_distribution()) + block + real(c_double), allocatable :: emp_distribution(:,:) + + emp_distribution = self%input_%sample_distribution() associate(nintervals => size(emp_distribution(:,1)), dvel_half => (emp_distribution(2,speed)-emp_distribution(1,speed))/2.d0) vel = [emp_distribution(1,speed) - dvel_half, [(emp_distribution(i,speed) + dvel_half, i=1,nintervals)]] + if (allocated(k)) deallocate(k) + allocate(k(size(speeds))) call do_concurrent_k(speeds, vel, k) - call do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) + if(allocated(output_distribution)) deallocate(output_distribution) + allocate(output_distribution(nintervals,2)) + call do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) output_distribution(:,freq) = output_distribution(:,freq)/sum(output_distribution(:,freq)) end associate - end associate + end block end procedure diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index 72c9cbf..f2a22d0 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -3,29 +3,35 @@ module subdomain_m private public :: subdomain_t + public :: march type subdomain_t private real, allocatable :: s_(:,:,:) contains procedure, pass(self) :: define - procedure, pass(self) :: step - procedure, pass(rhs) :: multiply - generic :: operator(.laplacian.) => laplacian - generic :: operator(*) => multiply - generic :: operator(+) => add - generic :: assignment(=) => assign_and_sync procedure dx procedure dy procedure dz procedure values + generic :: operator(*) => multiply + generic :: operator(+) => add + generic :: operator(.laplacian.) => laplacian + generic :: assignment(=) => assign_ + procedure, private, pass(rhs) :: multiply procedure, private :: laplacian procedure, private :: add - procedure, private :: assign_and_sync + procedure, private :: assign_ end type interface + pure module function laplacian(rhs) result(laplacian_rhs) + implicit none + class(subdomain_t), intent(in) :: rhs[*] + type(subdomain_t) laplacian_rhs + end function + module subroutine define(side, boundary_val, internal_val, n, self) implicit none real, intent(in) :: side, boundary_val, internal_val @@ -33,10 +39,10 @@ module subroutine define(side, boundary_val, internal_val, n, self) class(subdomain_t), intent(out) :: self end subroutine - module subroutine step(alpha_dt, self) + module subroutine march(alpha_dt, self) implicit none real, intent(in) :: alpha_dt - class(subdomain_t), intent(inout) :: self + type(subdomain_t), intent(inout) :: self[*] end subroutine pure module function values(self) result(my_values) @@ -63,12 +69,6 @@ pure module function dz(self) result(my_dz) real my_dz end function - pure module function laplacian(rhs) result(laplacian_rhs) - implicit none - class(subdomain_t), intent(in) :: rhs - type(subdomain_t) laplacian_rhs - end function - pure module function multiply(lhs, rhs) result(product) implicit none class(subdomain_t), intent(in) :: rhs @@ -83,7 +83,7 @@ pure module function add(lhs, rhs) result(total) type(subdomain_t) total end function - module subroutine assign_and_sync(lhs, rhs) + module subroutine assign_(lhs, rhs) implicit none class(subdomain_t), intent(out) :: lhs type(subdomain_t), intent(in) :: rhs diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index 2ca5eb9..f5bfa0c 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -1,14 +1,9 @@ submodule(subdomain_m) subdomain_s + use assert_m, only : assert, intrinsic_array_t use sourcery_m, only : data_partition_t - use assert_m, only : assert - use intrinsic_array_m, only : intrinsic_array_t implicit none - real, allocatable :: halo_x(:,:,:)[:] - integer, parameter :: west=1, east=2 - type(data_partition_t) data_partition - real dx_, dy_, dz_ integer my_nx, nx, ny, nz, me, num_subdomains, my_internal_west, my_internal_east real, allocatable :: increment(:,:,:) @@ -50,12 +45,6 @@ if (me == 1) self%s_(1 , :, :) = boundary_val ! minimum x boundary if (me == num_subdomains) self%s_(my_nx, :, :) = boundary_val ! maximum x boundary - - if (allocated(halo_x)) deallocate(halo_x) - allocate(halo_x(west:east, ny, nz)[*]) - if (me>1) halo_x(east,:,:)[me-1] = self%s_(1,:,:) - if (me0,"laplacian: easternmost subdomain too small") - do concurrent(j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & - (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - laplacian_rhs%s_(:, 1,:) = 0. - laplacian_rhs%s_(:,ny,:) = 0. - laplacian_rhs%s_(:,:, 1) = 0. - laplacian_rhs%s_(:,:,nz) = 0. - if (me==1) laplacian_rhs%s_(1,:,:) = 0. - if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. - + module procedure add + call assert(allocated(lhs%s_) .and. allocated(rhs%s_), "subdomain_t%add: allocated(rhs%s_)") + total%s_ = lhs%s_ + rhs%s_ end procedure module procedure multiply call assert(allocated(rhs%s_), "subdomain_t%multiply: allocated(rhs%s_)") - product%s_ = lhs * rhs%s_ + product%s_ = lhs + rhs%s_ end procedure - module procedure add - call assert(allocated(rhs%s_), "subdomain_t%add: allocated(rhs%s_)") - total%s_ = lhs%s_ + rhs%s_ - end procedure - - module procedure assign_and_sync - call assert(allocated(rhs%s_), "subdomain_t%assign_and_sync: allocated(rhs%s_)") - sync all + module procedure assign_ + call assert(allocated(rhs%s_), "subdomain_t%assign_: allocated(rhs%s_)") lhs%s_ = rhs%s_ - if (me>1) halo_x(east,:,:)[me-1] = rhs%s_(1,:,:) - if (me0,"laplacian: easternmost subdomain too small") if (.not. allocated(increment)) allocate(increment(my_nx,ny,nz)) + sync all call internal_points(increment) - call edge_points(increment) + call edge_points(self, increment) call apply_boundary_condition(increment) - - sync all self%s_ = self%s_ + increment - sync all - call exchange_halo(self%s_) contains @@ -170,14 +108,17 @@ subroutine internal_points(ds) end do end subroutine - subroutine edge_points(ds) + subroutine edge_points(self, ds) + type(subdomain_t), intent(in) :: self[*] real, intent(inout) :: ds(:,:,:) real, allocatable :: halo_west(:,:), halo_east(:,:) integer i, j, k - halo_west = merge(halo_x(west,:,:), self%s_(1, :,:), me/=1) - halo_east = merge(halo_x(east,:,:), self%s_(my_nx,:,:), me/=num_subdomains) - + if (me==1) then + halo_west = self%s_(1,:,:) + else + halo_west = self[me-1]%s_(ubound(self[me-1]%s_,1),:,:) + end if i = my_internal_west do concurrent(j=2:ny-1,k=2:nz-1) ds(i,j,k) = alpha_dt*( & @@ -187,6 +128,11 @@ subroutine edge_points(ds) ) end do + if (me==1) then + halo_east = self%s_(my_nx,:,:) + else + halo_east = self[me+1]%s_(1,:,:) + end if i = my_internal_east do concurrent(j=2:ny-1, k=2:nz-1) ds(i,j,k) = alpha_dt*( & @@ -199,7 +145,6 @@ subroutine edge_points(ds) subroutine apply_boundary_condition(ds) real, intent(inout) :: ds(:,:,:) - integer i, j ds(:,1:ny:ny-1, : ) = 0. ds(:, : ,1:nz:nz-1) = 0. @@ -207,12 +152,58 @@ subroutine apply_boundary_condition(ds) if (me==num_subdomains) ds(my_nx,:,:) = 0. end subroutine - subroutine exchange_halo(s) - real, intent(in) :: s(:,:,:) - if (me>1) halo_x(east,:,:)[me-1] = s(1,:,:) - if (me0,"laplacian: easternmost subdomain too small") + do concurrent(j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & + (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + laplacian_rhs%s_(:, 1,:) = 0. + laplacian_rhs%s_(:,ny,:) = 0. + laplacian_rhs%s_(:,:, 1) = 0. + laplacian_rhs%s_(:,:,nz) = 0. + if (me==1) laplacian_rhs%s_(1,:,:) = 0. + if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. + + end function + +end submodule subdomain_s diff --git a/src/matcha/t_cell_collection_m.f90 b/src/matcha/t_cell_collection_m.f90 index 323d47b..03789ac 100644 --- a/src/matcha/t_cell_collection_m.f90 +++ b/src/matcha/t_cell_collection_m.f90 @@ -42,8 +42,9 @@ pure module function construct(positions, time) result(t_cell_collection) interface t_cell_collection_bind_C_t - elemental module function construct_bind_C(t_cell_collection) result(t_cell_collection_bind_C) + impure elemental module function construct_bind_C(t_cell_collection) result(t_cell_collection_bind_C) !! Result is bind(C) representation of the data inside a t_cell_collection_t object + !! This function is impure because it invokes c_loc. Fortran 2023 compliance will allow this function to be pure. implicit none type(t_cell_collection_t), intent(in), target :: t_cell_collection type(t_cell_collection_bind_C_t) t_cell_collection_bind_C @@ -60,7 +61,6 @@ pure module function positions(self) result(my_positions) double precision, allocatable :: my_positions(:,:) end function - elemental module function time(self) result(my_time) !! Return the t_cell_collection_t object's time stamp implicit none diff --git a/src/matcha/t_cell_collection_s.F90 b/src/matcha/t_cell_collection_s.f90 similarity index 100% rename from src/matcha/t_cell_collection_s.F90 rename to src/matcha/t_cell_collection_s.f90 diff --git a/src/matcha_s.F90 b/src/matcha_s.F90 index 4735efc..0cc9081 100644 --- a/src/matcha_s.F90 +++ b/src/matcha_s.F90 @@ -32,7 +32,9 @@ associate(me => this_image()) associate(my_num_cells => data_partition%last(me) - data_partition%first(me) + 1) +#ifndef NAGFOR call random_init(repeatable=.true., image_distinct=.true.) +#endif allocate(random_positions(my_num_cells,ndim)) call random_number(random_positions) diff --git a/test/matcha_test_m.f90 b/test/matcha_test_m.f90 index f75e164..20ffe4c 100644 --- a/test/matcha_test_m.f90 +++ b/test/matcha_test_m.f90 @@ -67,7 +67,7 @@ function compare_image_distributions() result(test_passes) function compare_global_distributions() result(test_passes) logical test_passes type(output_t) output - double precision, allocatable :: simulated_distribution(:,:) + double precision, allocatable :: simulated_distribution(:,:), frequency_distribution(:) integer num_cells integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies real, parameter :: tolerance = 1.D-02 @@ -77,16 +77,16 @@ function compare_global_distributions() result(test_passes) associate(empirical_distribution => input%sample_distribution()) simulated_distribution = output%simulated_distribution() num_cells = output%my_num_cells() - simulated_distribution(:,freq) = num_cells*simulated_distribution(:,freq) - call co_sum(simulated_distribution(:,freq), result_image=1) + frequency_distribution = num_cells*simulated_distribution(:,freq) ! copy to work around nagfor 7.1 Build 7145 compiler bug + call co_sum(frequency_distribution, result_image=1) call co_sum(num_cells, result_image=1) if (this_image()/=1) then test_passes = .true. else - simulated_distribution(:,freq) = simulated_distribution(:,freq)/dble(num_cells) + frequency_distribution = frequency_distribution/dble(num_cells) associate( & diffmax_speeds=> maxval(abs(empirical_distribution(:,speed)-simulated_distribution(:,speed))), & - diffmax_freqs => maxval(abs(empirical_distribution(:,freq)-simulated_distribution(:,freq))) & + diffmax_freqs => maxval(abs(empirical_distribution(:,freq)-frequency_distribution)) & ) test_passes = (diffmax_freqs < tolerance) .and. (diffmax_speeds < tolerance) end associate diff --git a/test/subdomain_test_m.f90 b/test/subdomain_test_m.f90 index 144ab6e..5b506e3 100644 --- a/test/subdomain_test_m.f90 +++ b/test/subdomain_test_m.f90 @@ -3,7 +3,7 @@ module subdomain_test_m !! Define subdomain tests and procedures required for reporting results use sourcery_m, only : test_t, test_result_t - use subdomain_m, only : subdomain_t + use subdomain_m, only : subdomain_t, march use assert_m, only : assert implicit none @@ -53,7 +53,8 @@ subroutine output(v) critical do j = 1, size(v,2) do k = 1, size(v,3) - print *,"image ",this_image(),": ",j,k,v(:,j,k) + !print *,"image ",this_image(),": ",j,k,v(:,j,k) + print *,j,k,v(:,j,k) end do end do end critical @@ -62,10 +63,12 @@ subroutine output(v) function concave_laplacian() result(test_passes) logical test_passes - type(subdomain_t) f, laplacian_f + type(subdomain_t), save :: f[*] + type(subdomain_t) :: laplacian_f real, allocatable :: lap_f_vals(:,:,:) - call f%define(side=1., boundary_val=1., internal_val=2., n=21) ! internally constant subdomain with a step down at all surfaces + call f%define(side=1., boundary_val=1., internal_val=2., n=32) ! internally constant subdomain with a step down at all surfaces + sync all laplacian_f = .laplacian. f lap_f_vals = laplacian_f%values() @@ -153,15 +156,16 @@ function concave_laplacian() result(test_passes) function correct_steady_state() result(test_passes) logical test_passes - type(subdomain_t) T - real, parameter :: T_boundary = 1., T_initial = 2., tolerance = 0.01, T_steady = T_boundary, alpha = 1. - integer, parameter :: steps = 6000 + type(subdomain_t), save :: T[*] + real, parameter :: T_boundary = 1., T_initial = 2., tolerance = 5.E-03, T_steady = T_boundary, alpha = 1. + integer, parameter :: steps = 25000 integer step - call T%define(side=1., boundary_val=T_boundary, internal_val=T_initial, n=21) ! const. internally with a step down at boundaries + call T%define(side=1., boundary_val=T_boundary, internal_val=T_initial, n=32) ! const. internally with a step down at boundaries associate(dt => T%dx()*T%dy()*T%dz()/(4*alpha)) do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do end associate @@ -174,7 +178,7 @@ function correct_steady_state() result(test_passes) function functional_matches_procedural() result(test_passes) logical test_passes real, parameter :: tolerance = 1.E-06 - integer, parameter :: steps = 6000, n=21 + integer, parameter :: steps = 6000, n=32 real, parameter :: alpha = 1. real, parameter :: side=1., boundary_val=1., internal_val=2. @@ -188,13 +192,14 @@ function functional_matches_procedural() result(test_passes) function T_functional() real, allocatable :: T_functional(:,:,:) - type(subdomain_t) T + type(subdomain_t), save :: T[*] integer step call T%define(side, boundary_val, internal_val, n) associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do end associate @@ -204,14 +209,15 @@ function T_functional() function T_procedural() real, allocatable :: T_procedural(:,:,:) - type(subdomain_t) T + type(subdomain_t), save :: T[*] integer step call T%define(side, boundary_val, internal_val, n) associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps - call T%step(alpha*dt) + sync all + call march(alpha*dt, T) end do end associate