diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 725a16e86..c64e74a5d 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -2462,9 +2462,6 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title if title !== nothing ax.title = title end - if yscale !== nothing - ax.yscale = yscale - end if transform !== identity # Use transform to allow user to do something like data = abs.(data) @@ -2476,6 +2473,10 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title l = lines!(ax, xcoord, data; kwargs...) + if yscale !== nothing + ax.yscale = yscale + end + if fig === nothing return l else @@ -4062,8 +4063,9 @@ end function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutral=false, is=1, iz=nothing, fig=nothing, ax=nothing, - frame_index=nothing, outfile=nothing, transform=identity, - axis_args=Dict{Symbol,Any}(), kwargs...) + frame_index=nothing, outfile=nothing, yscale=nothing, + transform=identity, axis_args=Dict{Symbol,Any}(), + kwargs...) if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4141,20 +4143,27 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, this_f_unnorm = get_this_f_unnorm(it) - this_fmin, this_fmax = NaNMath.extrema(transform(this_f_unnorm)) + this_fmin, this_fmax = NaNMath.extrema(transform.(this_f_unnorm)) fmin = min(fmin, this_fmin) fmax = max(fmax, this_fmax) end yheight = fmax - fmin xwidth = dzdtmax - dzdtmin - limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, - fmin - 0.01*yheight, fmax + 0.01*yheight) + if yscale ∈ (log, log10) + # Need to calclutate y offsets differently to non-logarithmic y-axis case, to + # ensure ymin is not negative. + limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, + fmin * (fmin/fmax)^0.01, fmax * (fmax/fmin)^0.01) + else + limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, + fmin - 0.01*yheight, fmax + 0.01*yheight) + end dzdt = @lift vpagrid_to_dzdt(run_info.vpa.grid, vth[$frame_index], upar[$frame_index], run_info.evolve_ppar, run_info.evolve_upar) f_unnorm = @lift transform.(get_this_f_unnorm($frame_index)) - l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, kwargs...) + l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, yscale=yscale, kwargs...) if outfile !== nothing if fig === nothing @@ -4403,18 +4412,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix) && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"), - (z_upper, z_upper-8:z_upper, "wall+")) + nt = minimum(ri.nt for ri ∈ run_info) + + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), + (z_upper, z_upper-4:z_upper, "wall+")) f_input = copy(input_dict_dfns["f"]) f_input["iz0"] = z if input.plot - plot_vs_vpa(run_info, "f"; is=1, input=f_input, - outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_$(label)_vs_vpa.pdf" + save(outfile, fig) if moment_kinetic - plot_f_unnorm_vs_vpa(run_info; input=f_input, is=1, - outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) end plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1, @@ -4442,12 +4513,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix) end if input.animate - animate_vs_vpa(run_info, "f"; is=1, input=f_input, - outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f", yscale=log10) + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) if moment_kinetic - animate_f_unnorm_vs_vpa(run_info; input=f_input, is=1, - outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) end animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1, @@ -4528,19 +4667,82 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) moment_kinetic = any(ri !== nothing && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) + nt = minimum(ri.nt for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"), - (z_upper, z_upper-8:z_upper, "wall+")) + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), + (z_upper, z_upper-4:z_upper, "wall+")) f_neutral_input = copy(input_dict_dfns["f_neutral"]) f_neutral_input["iz0"] = z if input.plot - plot_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input, - outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf") + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vpa.pdf" + save(outfile, fig) if moment_kinetic - plot_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1, - outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input, + is=1, iz=iz, label="$(run_label)iz=$iz", + ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input, + is=1, iz=iz, label="$(run_label)iz=$iz", + ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) end if !is_1V @@ -4583,12 +4785,81 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) end if input.animate - animate_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input, - outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral", yscale=log10) + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) if moment_kinetic - animate_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1, - outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz, + input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz, + input=f_neutral_input, label="$(run_label)iz=$iz", + ax=ax, frame_index=frame_index, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) end if !is_1V @@ -4755,35 +5026,35 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) end # Electrons - if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) - - fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - for ri ∈ run_info - if length(run_info) > 1 - prefix = ri.run_name * ", " - else - prefix = "" - end - - varname = "electron_constraints_A_coefficient" - label = prefix * "(A-1)" - data = get_variable(ri, varname; it=it0, ir=ir0) - data .-= 1.0 - plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) - - varname = "electron_constraints_B_coefficient" - label = prefix * "B" - plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - input=input) - - varname = "electron_constraints_C_coefficient" - label = prefix * "C" - plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - input=input) - end - put_legend_right(fig, ax) - save(plot_prefix * "electron_constraints.pdf", fig) - end + #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) + + # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + # for ri ∈ run_info + # if length(run_info) > 1 + # prefix = ri.run_name * ", " + # else + # prefix = "" + # end + + # varname = "electron_constraints_A_coefficient" + # label = prefix * "(A-1)" + # data = get_variable(ri, varname; it=it0, ir=ir0) + # data .-= 1.0 + # plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) + + # varname = "electron_constraints_B_coefficient" + # label = prefix * "B" + # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + # input=input) + + # varname = "electron_constraints_C_coefficient" + # label = prefix * "C" + # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + # input=input) + # end + # put_legend_right(fig, ax) + # save(plot_prefix * "electron_constraints.pdf", fig) + #end end if input.animate @@ -4917,53 +5188,53 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) end # Electrons - if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) - - frame_index = Observable(1) - fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - - # Calculate plot limits manually so we can exclude the first time point, which - # often has a large value for (A-1) due to the way initialisation is done, - # which can make the subsequent values hard to see. - ymin = Inf - ymax = -Inf - for ri ∈ run_info - if length(run_info) > 1 - prefix = ri.run_name * ", " - else - prefix = "" - end - - varname = "electron_constraints_A_coefficient" - label = prefix * "(A-1)" - data = get_variable(ri, varname; ir=ir0) - data .-= 1.0 - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, input=input) - - varname = "electron_constraints_B_coefficient" - label = prefix * "B" - data = get_variable(ri, varname; ir=ir0) - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, ir=ir0, input=input) - - varname = "electron_constraints_C_coefficient" - label = prefix * "C" - data = get_variable(ri, varname; ir=ir0) - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, ir=ir0, input=input) - end - put_legend_right(fig, ax) - ylims!(ax, ymin, ymax) - save_animation(fig, frame_index, nt, - plot_prefix * "electron_constraints." * input.animation_ext) - end + #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) + + # frame_index = Observable(1) + # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + + # # Calculate plot limits manually so we can exclude the first time point, which + # # often has a large value for (A-1) due to the way initialisation is done, + # # which can make the subsequent values hard to see. + # ymin = Inf + # ymax = -Inf + # for ri ∈ run_info + # if length(run_info) > 1 + # prefix = ri.run_name * ", " + # else + # prefix = "" + # end + + # varname = "electron_constraints_A_coefficient" + # label = prefix * "(A-1)" + # data = get_variable(ri, varname; ir=ir0) + # data .-= 1.0 + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, input=input) + + # varname = "electron_constraints_B_coefficient" + # label = prefix * "B" + # data = get_variable(ri, varname; ir=ir0) + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, ir=ir0, input=input) + + # varname = "electron_constraints_C_coefficient" + # label = prefix * "C" + # data = get_variable(ri, varname; ir=ir0) + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, ir=ir0, input=input) + # end + # put_legend_right(fig, ax) + # ylims!(ax, ymin, ymax) + # save_animation(fig, frame_index, nt, + # plot_prefix * "electron_constraints." * input.animation_ext) + #end end catch e println("Error in constraints_plots(). Error was ", e) @@ -6893,9 +7164,9 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) input = Dict_to_NamedTuple(input_dict["timestep_diagnostics"]) - steps_fig = nothing - dt_fig = nothing - CFL_fig = nothing + steps_fig = nothing + dt_fig = nothing + CFL_fig = nothing if input.plot # Plot numbers of steps and numbers of failures @@ -6916,13 +7187,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) prefix = ri.run_name * " " end - plot_1d(ri.time, get_variable(ri, "steps_per_output"; it=it); + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + plot_1d(time, get_variable(ri, "steps_per_output"; it=it); label=prefix * "steps", ax=ax) # Fudge to create an invisible line on ax_failures that cycles the line colors # and adds a label for "steps_per_output" to the plot because we create the # legend from ax_failures. - plot_1d([ri.time[1]], [0]; label=prefix * "steps", ax=ax_failures) - plot_1d(ri.time, get_variable(ri, "failures_per_output"; it=it); + plot_1d([time[1]], [0]; label=prefix * "steps", ax=ax_failures) + plot_1d(time, get_variable(ri, "failures_per_output"; it=it); label=prefix * "failures", ax=ax_failures) failure_caused_by_per_output = get_variable(ri, @@ -6931,55 +7208,63 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) counter = 0 # Ion pdf failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; label=prefix * "failures caused by f_ion", ax=ax_failures) if ri.evolve_density # Ion density failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by n_ion", ax=ax_failures) end if ri.evolve_upar # Ion flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by u_ion", ax=ax_failures) end if ri.evolve_ppar # Ion flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_ion", ax=ax_failures) end if ri.n_neutral_species > 0 # Neutral pdf failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; label=prefix * "failures caused by f_neutral", ax=ax_failures) if ri.evolve_density # Neutral density failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by n_neutral", ax=ax_failures) end if ri.evolve_upar # Neutral flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by u_neutral", ax=ax_failures) end if ri.evolve_ppar # Neutral flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_neutral", ax=ax_failures) end + if occursin("ARK", ri.t_input["type"]) + # Nonlinear iteration failed to converge in implicit part of + # timestep + counter += 1 + plot_1d(time, @view failure_caused_by_per_output[counter,:]; + linestyle=:dot, + label=prefix * "nonlinear iteration convergence failure", ax=ax_failures) + end end if counter > size(failure_caused_by_per_output, 1) @@ -7016,20 +7301,50 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else prefix = ri.run_name * " " end - CFL_vars = ["minimum_CFL_ion_z", "minimum_CFL_ion_vpa"] + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + CFL_vars = String[] + implicit_CFL_vars = String[] + + push!(CFL_vars, "minimum_CFL_ion_z") + if occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"] + push!(implicit_CFL_vars, "minimum_CFL_ion_z") + end + push!(CFL_vars, "minimum_CFL_ion_vpa") + if occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) + push!(implicit_CFL_vars, "minimum_CFL_ion_vpa") + end if ri.n_neutral_species > 0 push!(CFL_vars, "minimum_CFL_neutral_z", "minimum_CFL_neutral_vz") end for varname ∈ CFL_vars var = get_variable(ri, varname) - maxval = min(maxval, maximum(var)) - plot_1d(ri.time, var; ax=ax, label=prefix*varname) + maxval = NaNMath.min(maxval, NaNMath.maximum(var)) + if occursin("neutral", varname) + if varname ∈ implicit_CFL_vars + linestyle = :dashdot + else + linestyle = :dash + end + else + if varname ∈ implicit_CFL_vars + linestyle = :dot + else + linestyle = nothing + end + end + plot_1d(time, var; ax=ax, label=prefix*varname, linestyle=linestyle) end end - ylims!(ax, 0.0, 4.0 * maxval) + ylims!(ax, 0.0, 10.0 * maxval) put_legend_right(CFL_fig, ax) - limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output") + limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output", + size=(600, 500)) for ri ∈ run_info if length(run_info) == 1 @@ -7037,57 +7352,113 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else prefix = ri.run_name * " " end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end limit_caused_by_per_output = get_variable(ri, "limit_caused_by_per_output"; it=it) counter = 0 - # Accuracy limit counter - counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "RK accuracy", ax=ax) - # Maximum timestep increase limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep increase", ax=ax) # Slower maximum timestep increase near last failure limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep increase near last fail", ax=ax) # Minimum timestep limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "min timestep", ax=ax) # Maximum timestep limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep", ax=ax) - # Ion z advection + # High nonlinear iterations count counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion z advect", ax=ax) + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "high nl iterations", ax=ax) - # Ion vpa advection + # Accuracy limit counters counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion vpa advect", ax=ax) + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion pdf RK accuracy", ax=ax, linestyle=:dash) + if ri.evolve_density + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion density RK accuracy", ax=ax, + linestyle=:dash) + end + if ri.evolve_upar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion upar RK accuracy", ax=ax, + linestyle=:dash) + end + if ri.evolve_ppar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion ppar RK accuracy", ax=ax, + linestyle=:dash) + end + if ri.n_neutral_species > 0 + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral pdf RK accuracy", ax=ax, + linestyle=:dash) + if ri.evolve_density + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral density RK accuracy", ax=ax, + linestyle=:dash) + end + if ri.evolve_upar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral uz RK accuracy", ax=ax, + linestyle=:dash) + end + if ri.evolve_ppar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral pz RK accuracy", ax=ax, + linestyle=:dash) + end + end + + if !(occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"]) + # Ion z advection + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion z advect", ax=ax, linestyle=:dot) + end + + if !(occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"])) + # Ion vpa advection + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion vpa advect", ax=ax, linestyle=:dot) + end if ri.n_neutral_species > 0 # Ion z advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral z advect", ax=ax) + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral z advect", ax=ax, linestyle=:dot) # Ion vpa advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral vz advect", ax=ax) + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral vz advect", ax=ax, linestyle=:dot) end if counter > size(limit_caused_by_per_output, 1) @@ -7103,6 +7474,41 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) put_legend_right(limits_fig, ax) + # Plot nonlinear solver diagnostics (if any) + nl_solvers_fig, ax = get_1d_ax(; xlabel="time", ylabel="iterations per solve/nonlinear-iteration") + has_nl_solver = false + + for ri ∈ run_info + if length(run_info) == 1 + prefix = "" + else + prefix = ri.run_name * " " + end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + nl_nonlinear_iterations_names = Tuple(v for v ∈ ri.variable_names + if occursin("_nonlinear_iterations", v)) + if nl_nonlinear_iterations_names != () + has_nl_solver = true + nl_prefixes = (split(v, "_nonlinear_iterations")[1] + for v ∈ nl_nonlinear_iterations_names) + for p ∈ nl_prefixes + nonlinear_iterations = get_variable(ri, "$(p)_nonlinear_iterations_per_solve") + linear_iterations = get_variable(ri, "$(p)_linear_iterations_per_nonlinear_iteration") + plot_1d(time, nonlinear_iterations, label=prefix * " " * p * " NL per solve", ax=ax) + plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax) + end + end + end + + if has_nl_solver + put_legend_right(nl_solvers_fig, ax) + end + if plot_prefix !== nothing outfile = plot_prefix * "timestep_diagnostics.pdf" @@ -7113,11 +7519,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) outfile = plot_prefix * "timestep_limits.pdf" save(outfile, limits_fig) + + if has_nl_solver + outfile = plot_prefix * "nonlinear_solver_iterations.pdf" + save(outfile, nl_solvers_fig) + end else display(steps_fig) display(dt_fig) display(CFL_fig) display(limits_fig) + if has_nl_solver + display(nl_solvers_fig) + end end end diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml index 45d1af96d..8de860c52 100644 --- a/moment_kinetics/Project.toml +++ b/moment_kinetics/Project.toml @@ -17,6 +17,7 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" +MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" diff --git a/moment_kinetics/src/calculus.jl b/moment_kinetics/src/calculus.jl index fbff04a2f..d2c16a478 100644 --- a/moment_kinetics/src/calculus.jl +++ b/moment_kinetics/src/calculus.jl @@ -362,7 +362,10 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims}, # test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x' # test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints #println("DEBUG MESSAGE: coord.name: ",coord.name," Ndims: ",Ndims," key: ",key) - if coord.name == "z" && Ndims==2 + if coord.name == "z" && Ndims==1 + df1d[j] = receive_buffer[] + #println("ASSIGNING DATA") + elseif coord.name == "z" && Ndims==2 df1d[j,:] .= receive_buffer[:] #println("ASSIGNING DATA") elseif coord.name == "z" && Ndims==3 @@ -374,6 +377,9 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims}, elseif coord.name == "z" && Ndims==6 df1d[:,:,:,j,:,:] .= receive_buffer[:,:,:,:,:] #println("ASSIGNING DATA") + elseif coord.name == "r" && Ndims==1 + df1d[j] = receive_buffer[] + #println("ASSIGNING DATA") elseif coord.name == "r" && Ndims==2 df1d[:,j] .= receive_buffer[:] #println("ASSIGNING DATA") diff --git a/moment_kinetics/src/charge_exchange.jl b/moment_kinetics/src/charge_exchange.jl index 66c1bb7fa..e70782c8c 100644 --- a/moment_kinetics/src/charge_exchange.jl +++ b/moment_kinetics/src/charge_exchange.jl @@ -9,12 +9,12 @@ using ..looping using ..interpolation: interpolate_to_grid_vpa! """ -update the evolved pdf for each ion and electron species to account for -charge exchange collisions between ions and neutrals +update the evolved pdf for each ion species to account for charge exchange collisions +between ions and neutrals """ -function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, - composition, vpa, vz, charge_exchange_frequency, - vpa_spectral, vz_spectral, dt) +function ion_charge_exchange_collisions_1V!(f_out, fvec_in, moments, composition, vpa, vz, + charge_exchange_frequency, vpa_spectral, + vz_spectral, dt) # This routine assumes a 1D model with: # nvz = nvpa and identical vz and vpa grids @@ -32,19 +32,6 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, moments.neutral.vth[:,:,is], moments, vpa, vz, charge_exchange_frequency, vz_spectral, dt) end - - begin_sn_r_z_region(no_synchronize=true) - @loop_sn isn begin - # apply CX collisions to all neutral species - # for each neutral species, obtain affect of charge exchange collisions - # with the corresponding ion species - @views charge_exchange_collisions_single_species!( - f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn], - fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn], - fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn], - moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments, - vz, vpa, charge_exchange_frequency, vpa_spectral, dt) - end else begin_s_r_z_region() @loop_s is begin @@ -58,8 +45,35 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, - fvec_in.pdf[ivpa,1,iz,ir,is]*fvec_in.density_neutral[iz,ir,is]) end end + end +end + +""" +update the evolved pdf for each neutral species to account for charge exchange collisions +between ions and neutrals +""" +function neutral_charge_exchange_collisions_1V!(f_neutral_out, fvec_in, moments, + composition, vpa, vz, + charge_exchange_frequency, vpa_spectral, + vz_spectral, dt) + # This routine assumes a 1D model with: + # nvz = nvpa and identical vz and vpa grids - begin_sn_r_z_region(no_synchronize=true) + if moments.evolve_density + begin_sn_r_z_region() + @loop_sn isn begin + # apply CX collisions to all neutral species + # for each neutral species, obtain affect of charge exchange collisions + # with the corresponding ion species + @views charge_exchange_collisions_single_species!( + f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn], + fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn], + fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn], + moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments, + vz, vpa, charge_exchange_frequency, vpa_spectral, dt) + end + else + begin_sn_r_z_region() @loop_sn isn begin # apply CX collisions to all neutral species # for each neutral species, obtain affect of charge exchange collisions @@ -135,21 +149,10 @@ function charge_exchange_collisions_single_species!(f_out, pdf_in, pdf_other, end end -function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, f_ion_vrvzvzeta_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, - charge_exchange_frequency, dt) +function ion_charge_exchange_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition, + vz, vr, vzeta, vpa, vperp, z, r, + charge_exchange_frequency, dt) # This routine assumes a 3V model with: - @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) - @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) - @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) - @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) - @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in)) @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -173,6 +176,26 @@ function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, - fvec_in.pdf[ivpa,ivperp,iz,ir,is]*fvec_in.density_neutral[iz,ir,isn]) end end +end + +function neutral_charge_exchange_collisions_3V!(f_neutral_out, f_ion_vrvzvzeta_in, + fvec_in, composition, vz, vr, vzeta, vpa, + vperp, z, r, charge_exchange_frequency, + dt) + # This routine assumes a 3V model with: + @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) + @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) + @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) + @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in)) + begin_sn_r_z_vzeta_vr_vz_region() @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin # apply CX collisions to all neutral species diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 5d5531d84..d034a2dd0 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -77,6 +77,14 @@ struct coordinate{T <: AbstractVector{mk_float}} scratch2::Array{mk_float,1} # scratch3 is an array used for intermediate calculations requiring n entries scratch3::Array{mk_float,1} + # scratch4 is an array used for intermediate calculations requiring n entries + scratch4::Array{mk_float,1} + # scratch5 is an array used for intermediate calculations requiring n entries + scratch5::Array{mk_float,1} + # scratch6 is an array used for intermediate calculations requiring n entries + scratch6::Array{mk_float,1} + # scratch7 is an array used for intermediate calculations requiring n entries + scratch7::Array{mk_float,1} # scratch_shared is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared::T @@ -221,10 +229,12 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing coord = coordinate(input.name, n_global, n_local, input.ngrid, input.nelement_global, input.nelement_local, input.nrank, input.irank, input.L, grid, cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option, - input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), scratch_shared, scratch_shared2, - scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm, - local_io_range, global_io_range, element_scale, element_shift, input.element_spacing_option, - element_boundaries, radau_first_element, other_nodes, one_over_denominator) + input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), + copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), + scratch_shared, scratch_shared2, scratch_2d, copy(scratch_2d), advection, + send_buffer, receive_buffer, input.comm, local_io_range, global_io_range, + element_scale, element_shift, input.element_spacing_option, element_boundaries, + radau_first_element, other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() @@ -242,7 +252,8 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing elseif input.discretization == "gausslegendre_pseudospectral" # create arrays needed for explicit GaussLegendre pseudospectral treatment in this # coordinate and create the matrices for differentiation - spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim) + spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim, + dirichlet_bc=occursin("zero", coord.bc)) # obtain the local derivatives of the uniform grid with respect to the used grid derivative!(coord.duniform_dgrid, coord.uniform_grid, coord, spectral) else diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl index e85e91158..c3e2c0523 100644 --- a/moment_kinetics/src/derivatives.jl +++ b/moment_kinetics/src/derivatives.jl @@ -149,6 +149,33 @@ dfns (ion) -> [vpa,vperp,z,r,s] dfns (neutrals) -> [vz,vr,vzeta,z,r,sn] """ +#df/dz +#1D version for f[z], used by implicit solvers +function derivative_z!(dfdz::AbstractArray{mk_float,1}, f::AbstractArray{mk_float,1}, + dfdz_lower_endpoints::AbstractArray{mk_float,0}, + dfdz_upper_endpoints::AbstractArray{mk_float,0}, + z_send_buffer::AbstractArray{mk_float,0}, + z_receive_buffer::AbstractArray{mk_float,0}, z_spectral, z) + + begin_serial_region() + + @serial_region begin + # differentiate f w.r.t z + derivative!(dfdz, f, z, z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[] = z.scratch_2d[1,1] + dfdz_upper_endpoints[] = z.scratch_2d[end,end] + end + + # now reconcile element boundaries across + # processes with large message involving all y + if z.nelement_local < z.nelement_global + reconcile_element_boundaries_MPI!( + dfdz, dfdz_lower_endpoints, dfdz_upper_endpoints, z_send_buffer, + z_receive_buffer, z) + end +end + #df/dz #2D version for f[z,r] -> Er, Ez, phi function derivative_z!(dfdz::AbstractArray{mk_float,2}, f::AbstractArray{mk_float,2}, diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index bae07d9bc..1aea757ce 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -55,7 +55,7 @@ moments & fields only struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower, Tchodura_upper, Texti1, Texti2, Texti3, Texti4, Texti5, Textn1, Textn2, Textn3, Textn4, Textn5, Tconstri, Tconstrn, - Tint, Tfailcause} + Tint, Tfailcause, Tnldiagnostics} # file identifier for the binary file to which data is written fid::Tfile # handle for the time variable @@ -126,6 +126,10 @@ struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower, # Last successful timestep before most recent timestep failure, used by adaptve # timestepping algorithm dt_before_last_fail::Ttime + # Variables recording diagnostic information about non-linear solvers (used for + # implicit parts of timestep). These are stored in nested NamedTuples so that we can + # write diagnostics generically for as many nonlinear solvers as are created. + nl_solver_diagnostics::Tnldiagnostics # Use parallel I/O? parallel_io::Bool @@ -194,7 +198,7 @@ open the necessary output files function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, restart_time_index, - previous_runs_info, time_for_setup) + previous_runs_info, time_for_setup, t_params, nl_solver_params) begin_serial_region() @serial_region begin # Only read/write from first process in each 'block' @@ -222,13 +226,14 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, restart_time_index, previous_runs_info, - time_for_setup) + time_for_setup, t_params, nl_solver_params) io_dfns = setup_dfns_io(out_prefix, io_input.binary_format, boundary_distributions, r, z, vperp, vpa, vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, - restart_time_index, previous_runs_info, time_for_setup) + restart_time_index, previous_runs_info, time_for_setup, + t_params, nl_solver_params) return ascii, io_moments, io_dfns end @@ -644,7 +649,8 @@ define dynamic (time-evolving) moment variables for writing to the hdf5 file function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, r::coordinate, z::coordinate, parallel_io, external_source_settings, evolve_density, - evolve_upar, evolve_ppar) + evolve_upar, evolve_ppar, t_params, + nl_solver_params) @serial_region begin dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables") @@ -694,19 +700,13 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, dynamic, "failure_counter", mk_int; parallel_io=parallel_io, description="cumulative number of timestep failures for the run") - n_failure_vars = 1 + evolve_density + evolve_upar + evolve_ppar - if n_neutral_species > 0 - n_failure_vars *= 2 - end + n_failure_vars = length(t_params.failure_caused_by) io_failure_caused_by = create_dynamic_variable!( dynamic, "failure_caused_by", mk_int; diagnostic_var_size=n_failure_vars, parallel_io=parallel_io, description="cumulative count of how many times each variable caused a " * "timestep failure for the run") - n_limit_vars = 5 + 2 - if n_neutral_species > 0 - n_limit_vars += 2 - end + n_limit_vars = length(t_params.limit_caused_by) io_limit_caused_by = create_dynamic_variable!( dynamic, "limit_caused_by", mk_int; diagnostic_var_size=n_limit_vars, parallel_io=parallel_io, @@ -718,6 +718,21 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, description="Last successful timestep before most recent timestep failure, " * "used by adaptve timestepping algorithm") + io_nl_solver_diagnostics = NamedTuple( + term=>(n_solves=create_dynamic_variable!( + dynamic, "$(term)_n_solves", mk_int; parallel_io=parallel_io, + description="Number of nonlinear solves for $term"), + nonlinear_iterations=create_dynamic_variable!( + dynamic, "$(term)_nonlinear_iterations", mk_int; + parallel_io=parallel_io, + description="Number of nonlinear iterations for $term"), + linear_iterations=create_dynamic_variable!( + dynamic, "$(term)_linear_iterations", mk_int; + parallel_io=parallel_io, + description="Number of linear iterations for $term"), + ) + for (term, params) ∈ pairs(nl_solver_params) if params !== nothing) + return io_moments_info(fid, io_time, io_phi, io_Er, io_Ez, io_density, io_upar, io_ppar, io_pperp, io_qpar, io_vth, io_dSdt, io_chodura_lower, io_chodura_upper, io_density_neutral, io_uz_neutral, @@ -740,7 +755,8 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, neutral_constraints_C_coefficient, io_time_for_run, io_step_counter, io_dt, io_failure_counter, io_failure_caused_by, - io_limit_caused_by, io_dt_before_last_fail, parallel_io) + io_limit_caused_by, io_dt_before_last_fail, io_nl_solver_diagnostics, + parallel_io) end # For processes other than the root process of each shared-memory group... @@ -1073,7 +1089,8 @@ file """ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, external_source_settings, - evolve_density, evolve_upar, evolve_ppar) + evolve_density, evolve_upar, evolve_ppar, t_params, + nl_solver_params) @serial_region begin io_moments = define_dynamic_moment_variables!(fid, composition.n_ion_species, @@ -1081,7 +1098,8 @@ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, com parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, t_params, + nl_solver_params) dynamic = get_group(fid, "dynamic_data") @@ -1152,7 +1170,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, previous_runs_info, - time_for_setup) + time_for_setup, t_params, nl_solver_params) @serial_region begin moments_prefix = string(prefix, ".moments") if !parallel_io @@ -1182,7 +1200,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z io_moments = define_dynamic_moment_variables!( fid, composition.n_ion_species, composition.n_neutral_species, r, z, parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, t_params, nl_solver_params) close(fid) @@ -1206,6 +1224,15 @@ function reopen_moments_io(file_info) function getvar(name) if name ∈ variable_list return dyn[name] + elseif name == "nl_solver_diagnostics" + nl_names = (name for name ∈ variable_list + if occursin("_nonlinear_iterations", name)) + nl_prefixes = (split(name, "_nonlinear_iterations")[1] + for name ∈ nl_names) + return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], + nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], + linear_iterations=dyn["$(term)_linear_iterations"]) + for term ∈ nl_prefixes) else return nothing end @@ -1238,7 +1265,8 @@ function reopen_moments_io(file_info) getvar("time_for_run"), getvar("step_counter"), getvar("dt"), getvar("failure_counter"), getvar("failure_caused_by"), getvar("limit_caused_by"), - getvar("dt_before_last_fail"), parallel_io) + getvar("dt_before_last_fail"), + getvar("nl_solver_diagnostics"), parallel_io) end # For processes other than the root process of each shared-memory group... @@ -1252,7 +1280,7 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, - previous_runs_info, time_for_setup) + previous_runs_info, time_for_setup, t_params, nl_solver_params) @serial_region begin dfns_prefix = string(prefix, ".dfns") @@ -1288,7 +1316,8 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper ### in a struct for later access ### io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, - external_source_settings, evolve_density, evolve_upar, evolve_ppar) + external_source_settings, evolve_density, evolve_upar, evolve_ppar, t_params, + nl_solver_params) close(fid) @@ -1312,6 +1341,15 @@ function reopen_dfns_io(file_info) function getvar(name) if name ∈ variable_list return dyn[name] + elseif name == "nl_solver_diagnostics" + nl_names = (name for name ∈ variable_list + if occursin("_nonlinear_iterations", name)) + nl_prefixes = (split(name, "_nonlinear_iterations")[1] + for name ∈ nl_names) + return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], + nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], + linear_iterations=dyn["$(term)_linear_iterations"]) + for term ∈ nl_prefixes) else return nothing end @@ -1346,7 +1384,8 @@ function reopen_dfns_io(file_info) getvar("dt"), getvar("failure_counter"), getvar("failure_caused_by"), getvar("limit_caused_by"), - getvar("dt_before_last_fail"), parallel_io) + getvar("dt_before_last_fail"), + getvar("nl_solver_diagnostics"), parallel_io) return io_dfns_info(fid, getvar("f"), getvar("f_neutral"), parallel_io, io_moments) @@ -1382,7 +1421,9 @@ write time-dependent moments data for ions and neutrals to the binary output fil """ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species, n_neutral_species, io_or_file_info_moments, - t_idx, time_for_run, t_params, r, z) + t_idx, time_for_run, t_params, nl_solver_params, + r, z) + @serial_region begin # Only read/write from first process in each 'block' @@ -1419,6 +1460,17 @@ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species, only_root=true) append_to_dynamic_var(io_moments.dt_before_last_fail, t_params.dt_before_last_fail[], t_idx, parallel_io) + for (k,v) ∈ pairs(nl_solver_params) + if v === nothing + continue + end + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].n_solves, + v.n_solves[], t_idx, parallel_io) + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].nonlinear_iterations, + v.nonlinear_iterations[], t_idx, parallel_io) + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].linear_iterations, + v.linear_iterations[], t_idx, parallel_io) + end closefile && close(io_moments.fid) end @@ -1619,8 +1671,8 @@ binary output file """ function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species, n_neutral_species, io_or_file_info_dfns, t_idx, - time_for_run, t_params, r, z, vperp, vpa, vzeta, vr, - vz) + time_for_run, t_params, nl_solver_params, r, z, + vperp, vpa, vzeta, vr, vz) @serial_region begin # Only read/write from first process in each 'block' @@ -1636,7 +1688,7 @@ function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species, # This also updates the time. write_all_moments_data_to_binary(moments, fields, t, n_ion_species, n_neutral_species, io_dfns.io_moments, t_idx, - time_for_run, t_params, r, z) + time_for_run, t_params, nl_solver_params, r, z) # add the distribution function data at this time slice to the output file write_ion_dfns_data_to_binary(pdf.ion.norm, n_ion_species, io_dfns, t_idx, r, z, @@ -1901,7 +1953,7 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor #qr_neutral=nothing, qzeta_neutral=nothing, vth_neutral=nothing, phi=nothing, Er=nothing, Ez=nothing, - istage=0, label="") + istage=0, label="", t_params=nothing, nl_solver_params=()) global debug_output_file # Only read/write from first process in each 'block' @@ -1933,11 +1985,12 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor r, z, false, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, t_params, + nl_solver_params) io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition.n_ion_species, composition.n_neutral_species, false, external_source_settings, - evolve_density, evolve_upar, evolve_ppar) + evolve_density, evolve_upar, evolve_ppar, t_params, nl_solver_params) # create the "istage" variable, used to identify the rk stage where # `debug_dump()` was called diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 24a0b925f..539a5fd86 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -100,7 +100,7 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info Qmat::Array{mk_float,2} end -function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) +function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, dirichlet_bc=true) lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,collision_operator_dim=collision_operator_dim) radau = setup_gausslegendre_pseudospectral_radau(coord,collision_operator_dim=collision_operator_dim) @@ -114,9 +114,9 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) K_matrix = allocate_float(coord.n,coord.n) L_matrix = allocate_float(coord.n,coord.n) - setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M") - setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms") - setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms") + setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc) mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) @@ -835,7 +835,7 @@ where M is the mass matrix and K is the stiffness matrix. function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, lobatto::gausslegendre_base_info, radau::gausslegendre_base_info, - coord,option) + coord,option; dirichlet_bc=false) QQ_j = allocate_float(coord.ngrid,coord.ngrid) QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid) @@ -883,6 +883,19 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0 end end + + if dirichlet_bc + # Make matrix diagonal for first/last grid points so it does not change the values + # there + if coord.irank == 0 + QQ_global[1,:] .= 0.0 + QQ_global[1,1] = 1.0 + end + if coord.irank == coord.nrank - 1 + QQ_global[end,:] .= 0.0 + QQ_global[end,end] = 1.0 + end + end return nothing end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index fe7661e9a..f81d28a30 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -38,7 +38,8 @@ end an option but known at compile time when a `time_info` struct is passed as a function argument. """ -struct time_info{Terrorsum <: Real} +struct time_info{Terrorsum <: Real, Trkimp, Timpzero} + n_variables::mk_int nstep::mk_int end_time::mk_float dt::MPISharedArray{mk_float,1} @@ -52,10 +53,14 @@ struct time_info{Terrorsum <: Real} failure_counter::Ref{mk_int} failure_caused_by::Vector{mk_int} limit_caused_by::Vector{mk_int} + nwrite_moments::mk_int + nwrite_dfns::mk_int moments_output_times::Vector{mk_float} dfns_output_times::Vector{mk_float} type::String rk_coefs::Array{mk_float,2} + rk_coefs_implicit::Trkimp + implicit_coefficient_is_zero::Timpzero n_rk_stages::mk_int rk_order::mk_int adaptive::Bool @@ -69,6 +74,9 @@ struct time_info{Terrorsum <: Real} last_fail_proximity_factor::mk_float minimum_dt::mk_float maximum_dt::mk_float + implicit_ion_advance::Bool + implicit_vpa_advection::Bool + write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool steady_state_residual::Bool @@ -87,15 +95,20 @@ mutable struct advance_info neutral_z_advection::Bool neutral_r_advection::Bool neutral_vz_advection::Bool - cx_collisions::Bool - cx_collisions_1V::Bool - ionization_collisions::Bool - ionization_collisions_1V::Bool + ion_cx_collisions::Bool + neutral_cx_collisions::Bool + ion_cx_collisions_1V::Bool + neutral_cx_collisions_1V::Bool + ion_ionization_collisions::Bool + neutral_ionization_collisions::Bool + ion_ionization_collisions_1V::Bool + neutral_ionization_collisions_1V::Bool ionization_source::Bool krook_collisions_ii::Bool explicit_weakform_fp_collisions::Bool external_source::Bool - numerical_dissipation::Bool + ion_numerical_dissipation::Bool + neutral_numerical_dissipation::Bool source_terms::Bool continuity::Bool force_balance::Bool diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl index a4e7ac3f1..babdd5d9b 100644 --- a/moment_kinetics/src/ionization.jl +++ b/moment_kinetics/src/ionization.jl @@ -2,8 +2,10 @@ """ module ionization -export ionization_collisions_1V! -export ionization_collisions_3V! +export ion_ionization_collisions_1V! +export neutral_ionization_collisions_1V! +export ion_ionization_collisions_3V! +export neutral_ionization_collisions_3V! export constant_ionization_source! using ..interpolation: interpolate_to_grid_vpa! @@ -64,18 +66,12 @@ function constant_ionization_source!(f_out, fvec_in, vpa, vperp, z, r, moments, end end -function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp, z, r, - vz_spectral, moments, composition, collisions, dt) +function ion_ionization_collisions_1V!(f_out, fvec_in, vz, vpa, vperp, z, r, vz_spectral, + moments, composition, collisions, dt) # This routine assumes a 1D model with: # nvz = nvpa and identical vz and vpa grids # nvperp = nvr = nveta = 1 # constant charge_exchange_frequency independent of species - @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) - @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) - @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) - @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck 1 == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -83,8 +79,6 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp @boundscheck composition.n_ion_species == size(f_out,5) || throw(BoundsError(f_out)) - # keep vpa vperp vz vr vzeta local so that - # vpa loop below can also be used for vz begin_r_z_vpa_region() if moments.evolve_density @@ -155,21 +149,44 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp @loop_r_z_vpa ir iz ivpa begin # apply ionization collisions to all ion species f_out[ivpa,1,iz,ir,is] += dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] - # apply ionization collisions to all neutral species - f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] end end end end -function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) - # This routine assumes a 3V model with: - @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) +function neutral_ionization_collisions_1V!(f_neutral_out, fvec_in, vz, vpa, vperp, z, r, + vz_spectral, moments, composition, collisions, dt) + # This routine assumes a 1D model with: + # nvz = nvpa and identical vz and vpa grids + # nvperp = nvr = nveta = 1 + # constant charge_exchange_frequency independent of species + @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + + if !moments.evolve_density + begin_r_z_vpa_region() + + @loop_s is begin + # ion ionisation rate = < f_n > n_e R_ion + # neutral "ionisation" (depopulation) rate = - f_n n_e R_ion + # no gyroaverage here as 1V code + #NB: used quasineutrality to replace electron density n_e with ion density + #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e) + isn = is + @loop_r_z_vpa ir iz ivpa begin + # apply ionization collisions to all neutral species + f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] + end + end + end +end + +function ion_ionization_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + # This routine assumes a 3V model with: @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -185,21 +202,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ begin_s_r_z_vperp_vpa_region() - # #if collisions.constant_ionization_rate - # # ## Oddly the test in test/harrisonthompson.jl matches the analitical - # # ## solution (which assumes width=0.0) better with width=0.5 than with, - # # ## e.g., width=0.15. Possibly narrower widths would require more vpa - # # ## resolution, which then causes crashes due to overshoots giving - # # ## negative f?? - # # #width = 0.5 - # # #@loop_s is begin - # # # #@loop_r_z_vperp_vpa ir iz ivperp ivpa begin - # # # # #f_out[ivpa,ivperp,iz,ir,is] += dt*collisions.ionization/width^3*exp(-((vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/width^2)) - # # # #end - # # #end - # # #return nothing - # #end - # ion ionization rate = < f_n > n_e R_ion # neutral "ionization" (depopulation) rate = - f_n n_e R_ion #NB: used quasineutrality to replace electron density n_e with ion density @@ -213,6 +215,23 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ end end end +end + +function neutral_ionization_collisions_3V!(f_neutral_out, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + # This routine assumes a 3V model with: + @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) + @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) + @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) + @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + + ionization_frequency = collisions.ionization + + # ion ionization rate = < f_n > n_e R_ion + # neutral "ionization" (depopulation) rate = - f_n n_e R_ion + #NB: used quasineutrality to replace electron density n_e with ion density + #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e) begin_sn_r_z_vzeta_vr_vz_region() @loop_sn isn begin for is ∈ 1:composition.n_ion_species @@ -222,7 +241,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ end end end - end end diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 91992e9c6..430edb5a9 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -2722,6 +2722,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin vz_chunk_size = 1 end + # Get variable names just from the first restart, for simplicity + variable_names = get_variable_keys(get_group(fids0[1], "dynamic_data")) + if parallel_io files = fids0 else @@ -2733,9 +2736,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin run_info = (run_name=run_name, run_prefix=base_prefix, parallel_io=parallel_io, ext=ext, nblocks=nblocks, files=files, input=input, n_ion_species=n_ion_species, n_neutral_species=n_neutral_species, - evolve_moments=evolve_moments, composition=composition, species=species, - collisions=collisions, geometry=geometry, drive_input=drive_input, - num_diss_params=num_diss_params, + evolve_moments=evolve_moments, t_input=t_input, composition=composition, + species=species, collisions=collisions, geometry=geometry, + drive_input=drive_input, num_diss_params=num_diss_params, external_source_settings=external_source_settings, evolve_density=evolve_density, evolve_upar=evolve_upar, evolve_ppar=evolve_ppar, @@ -2749,7 +2752,8 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin vz_spectral=vz_spectral, r_chunk_size=r_chunk_size, z_chunk_size=z_chunk_size, vperp_chunk_size=vperp_chunk_size, vpa_chunk_size=vpa_chunk_size, vzeta_chunk_size=vzeta_chunk_size, - vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, dfns=dfns) + vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, + variable_names=variable_names, dfns=dfns) return run_info end @@ -3283,6 +3287,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t speed = allocate_float(nz, nvpa, nvperp, nr, nspecies, nt) Er = get_variable(run_info, "Er") + gEr = allocate_float(nvperp, nz, nr, nspecies, nt) + for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz + # Don't support gyroaveraging here (yet) + gEr[:,iz,ir,is,it] .= Er[iz,ir,it] + end setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, @@ -3293,11 +3302,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t # Only need some struct with a 'speed' variable advect = (speed=@view(speed[:,:,:,:,is,it]),) # Only need Er - fields = (Er=@view(Er[:,:,it]),) + fields = (gEr=@view(gEr[:,:,:,is,it]),) @views update_speed_z!(advect, upar[:,:,is,it], vth[:,:,is,it], run_info.evolve_upar, run_info.evolve_ppar, fields, run_info.vpa, run_info.vperp, run_info.z, run_info.r, - run_info.time[it], run_info.geometry) + run_info.time[it], run_info.geometry, is) end # Horrible hack so that we can get the speed back without rearranging the @@ -3331,9 +3340,6 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t end end elseif variable_name == "vpa_advect_speed" - # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs - # to get_variable() in this case. Instead select a slice of the result. - Ez = get_variable(run_info, "Ez") density = get_variable(run_info, "density") upar = get_variable(run_info, "parallel_flow") ppar = get_variable(run_info, "parallel_pressure") @@ -3347,9 +3353,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t dqpar_dz = get_z_derivative(run_info, "parallel_heat_flux") if run_info.external_source_settings.ion.active external_source_amplitude = get_variable(run_info, "external_source_amplitude") - external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude") - external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude") - external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude") + if run_info.evolve_density + external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude") + else + external_source_density_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_upar + external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude") + else + external_source_momentum_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_ppar + external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude") + else + external_source_pressure_amplitude = zeros(0,0,run_info.nt) + end else external_source_amplitude = zeros(0,0,run_info.nt) external_source_density_amplitude = zeros(0,0,run_info.nt) @@ -3361,6 +3379,15 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nvperp = run_info.vperp.n nvpa = run_info.vpa.n + # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs + # to get_variable() in this case. Instead select a slice of the result. + Ez = get_variable(run_info, "Ez") + gEz = allocate_float(nvperp, nz, nr, nspecies, nt) + for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz + # Don't support gyroaveraging here (yet) + gEz[:,iz,ir,is,it] .= Ez[iz,ir,it] + end + speed=allocate_float(nvpa, nvperp, nz, nr, nspecies, nt) setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, @@ -3371,7 +3398,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t # Only need some struct with a 'speed' variable advect = [(speed=@view(speed[:,:,:,:,is,it]),) for is ∈ 1:nspecies] # Only need Ez - fields = (Ez=@view(Ez[:,:,it]),) + fields = (gEz=@view(gEz[:,:,:,:,it]),) @views moments = (ion=(dppar_dz=dppar_dz[:,:,:,it], dupar_dz=dupar_dz[:,:,:,it], dvth_dz=dvth_dz[:,:,:,it], @@ -3411,6 +3438,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t speed = allocate_float(nz, nvz, nvr, nvzeta, nr, nspecies, nt) + setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta, vr=nvr, vz=nvz) @@ -3474,9 +3502,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t dqz_dz = get_z_derivative(run_info, "qz_neutral") if run_info.external_source_settings.neutral.active external_source_amplitude = get_variable(run_info, "external_source_neutral_amplitude") - external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude") - external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude") - external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude") + if run_info.evolve_density + external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude") + else + external_source_density_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_upar + external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude") + else + external_source_momentum_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_ppar + external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude") + else + external_source_pressure_amplitude = zeros(0,0,run_info.nt) + end else external_source_amplitude = zeros(0,0,run_info.nt) external_source_density_amplitude = zeros(0,0,run_info.nt) @@ -3490,6 +3530,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nvz = run_info.vz.n speed = allocate_float(nvz, nvr, nvzeta, nz, nr, nspecies, nt) + setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta, vr=nvr, vz=nvz) @@ -3676,6 +3717,20 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t variable[it] = min_CFL end variable = select_slice_of_variable(variable; kwargs...) + elseif occursin("_nonlinear_iterations_per_solve", variable_name) + prefix = split(variable_name, "_nonlinear_iterations_per_solve")[1] + nl_nsolves = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_n_solves"; kwargs...) + nl_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_nonlinear_iterations"; kwargs...) + variable = nl_iterations ./ nl_nsolves + elseif occursin("_linear_iterations_per_nonlinear_iteration", variable_name) + prefix = split(variable_name, "_linear_iterations_per_nonlinear_iteration")[1] + nl_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_nonlinear_iterations"; kwargs...) + nl_linear_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_linear_iterations"; kwargs...) + variable = nl_linear_iterations ./ nl_iterations else variable = postproc_load_variable(run_info, variable_name; kwargs...) end diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index ae49821dd..f8c0a2274 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -7,6 +7,7 @@ module moment_constraints using ..communication: _block_synchronize using ..looping +using ..type_definitions: mk_float using ..velocity_moments: integrate_over_vspace, update_qpar! export hard_force_moment_constraints!, hard_force_moment_constraints_neutral! @@ -75,12 +76,26 @@ function hard_force_moment_constraints!(f, moments, vpa) A = 1.0 / I0 @. f1d = A * f1d + B = NaN + C = NaN + else + A = NaN B = NaN C = NaN end return A, B, C end +function hard_force_moment_constraints!(f::AbstractArray{mk_float,5}, moments, vpa) + A = moments.ion.constraints_A_coefficient + B = moments.ion.constraints_B_coefficient + C = moments.ion.constraints_C_coefficient + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + A[iz,ir,is], B[iz,ir,is], C[iz,ir,is] = + hard_force_moment_constraints!(@view(f[:,:,iz,ir,is]), moments, vpa) + end +end """ hard_force_moment_constraints_neutral!(f, moments, vz) @@ -125,6 +140,88 @@ function hard_force_moment_constraints_neutral!(f, moments, vz) A = 1.0 / I0 @. f1d = A * f1d + B = NaN + C = NaN + else + A = NaN + B = NaN + C = NaN + end + + return A, B, C +end +function hard_force_moment_constraints_neutral!(f::AbstractArray{mk_float,6}, moments, vz) + A = moments.neutral.constraints_A_coefficient + B = moments.neutral.constraints_B_coefficient + C = moments.neutral.constraints_C_coefficient + begin_sn_r_z_region() + @loop_sn_r_z isn ir iz begin + A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn] = + hard_force_moment_constraints_neutral!(@view(f[:,:,:,iz,ir,is]), moments, vz) + end +end + +""" + moment_constraints_on_residual!(residual, f, moments, vpa) + +A 'residual' (used in implicit timestepping) is an update to the distribution function +\$f_\\mathrm{new} = f_\\mathrm{old} + \\mathtt{residual}\$. \$f_\\mathrm{new}\$ should +obey the moment constraints ([Constraints on normalized distribution function](@ref)), and +\$f_\\mathrm{old}\$ already obeys the constraints, which means that the first 3 moments of +`residual` should be zero. We impose this constraint by adding corrections proportional to +`f`. +```math +r = \\hat{r} + (A + B w_{\\|} + C w_{\\|}^2) f +``` + +Note this function assumes the input is given at a single spatial position. +""" +function moment_constraints_on_residual!(residual::AbstractArray{T,N}, + f::AbstractArray{T,N}, moments, vpa) where {T,N} + if N == 2 + f = @view f[:,1] + residual = @view residual[:,1] + end + if moments.evolve_ppar + I0 = integrate_over_vspace(f, vpa.wgts) + I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts) + I3 = integrate_over_vspace(f, vpa.grid, 3, vpa.wgts) + I4 = integrate_over_vspace(f, vpa.grid, 4, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) + J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts) + J2 = integrate_over_vspace(residual, vpa.grid, 2, vpa.wgts) + + A = ((I2*J2 - J0*I4)*(I2*I4 - I3^2) + (I2*I3 - I1*I4)*(J2*I3 - J1*I4)) / + ((I0*I4 - I2^2)*(I2*I4 - I3^2) - (I2*I3 - I1*I4)^2) + B = (J2*I3 - J1*I4 + (I2*I3 - I1*I4)*A) / (I2*I4 - I3^2) + C = -(J2 + I2*A + I3*B) / I4 + + @. residual = residual + (A + B*vpa.grid + C*vpa.grid*vpa.grid) * f + elseif moments.evolve_upar + I0 = integrate_over_vspace(f, vpa.wgts) + I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) + J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts) + + A = (I1*J1 - J0*I2) / (I0*I2 - I1^2) + B = -(J1 + I1*A) / I2 + + @. residual = residual + (A + B*vpa.grid) * f + + C = NaN + elseif moments.evolve_density + I0 = integrate_over_vspace(f, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) + A = -J0 / I0 + @. f = A * f + @. residual = residual + A * f + + B = NaN + C = NaN + else + A = NaN B = NaN C = NaN end diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index b1a07f0eb..90fd7f1bd 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -33,6 +33,7 @@ include("input_structs.jl") include("runge_kutta.jl") include("reference_parameters.jl") include("coordinates.jl") +include("nonlinear_solvers.jl") include("file_io.jl") include("geo.jl") include("gyroaverages.jl") @@ -46,6 +47,7 @@ include("moment_constraints.jl") include("fokker_planck_test.jl") include("fokker_planck_calculus.jl") include("fokker_planck.jl") +include("boundary_conditions.jl") include("advection.jl") include("vpa_advection.jl") include("z_advection.jl") @@ -54,7 +56,6 @@ include("vperp_advection.jl") include("neutral_r_advection.jl") include("neutral_z_advection.jl") include("neutral_vz_advection.jl") -include("boundary_conditions.jl") include("charge_exchange.jl") include("ionization.jl") include("krook_collisions.jl") @@ -318,14 +319,14 @@ function setup_moment_kinetics(input_dict::AbstractDict; # create arrays and do other work needed to setup # the main time advance loop -- including normalisation of f by density if requested - moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs, - manufactured_source_list = + moments, spectral_objects, scratch, scratch_implicit, advance, advance_implicit, + t_params, fp_arrays, gyroavs, manufactured_source_list, nl_solver_params = setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrophase, vz_spectral, vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral, z_spectral, r_spectral, composition, moments, t_input, code_time, dt, dt_before_last_fail, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, - advection_structs, scratch_dummy, restarting) + advection_structs, scratch_dummy, restarting, input_dict) # This is the closest we can get to the end time of the setup before writing it to the # output file @@ -335,26 +336,28 @@ function setup_moment_kinetics(input_dict::AbstractDict; ascii_io, io_moments, io_dfns = setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, moments.evolve_density, moments.evolve_upar, moments.evolve_ppar, external_source_settings, input_dict, - restart_time_index, previous_runs_info, time_for_setup) + restart_time_index, previous_runs_info, time_for_setup, t_params, + nl_solver_params) # write initial data to ascii files write_data_to_ascii(pdf, moments, fields, vpa, vperp, z, r, code_time, composition.n_ion_species, composition.n_neutral_species, ascii_io) # write initial data to binary files write_all_moments_data_to_binary(moments, fields, code_time, - composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0, t_params, r, - z) + composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0, + t_params, nl_solver_params, r, z) write_all_dfns_data_to_binary(pdf, moments, fields, code_time, composition.n_ion_species, composition.n_neutral_species, io_dfns, 1, 0.0, - t_params, r, z, vperp, vpa, vzeta, vr, vz) + t_params, nl_solver_params, r, z, vperp, vpa, vzeta, vr, vz) begin_s_r_z_vperp_region() - return pdf, scratch, code_time, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advection_structs, + return pdf, scratch, scratch_implicit, code_time, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advection_structs, composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, ascii_io, io_moments, io_dfns + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, ascii_io, + io_moments, io_dfns end """ diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 56358a27b..82b8b00fb 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -211,6 +211,9 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) last_fail_proximity_factor=1.05, minimum_dt=0.0, maximum_dt=Inf, + implicit_ion_advance=true, + implicit_vpa_advection=false, + write_after_fixed_step_count=false, high_precision_error_sum=false, ) if timestepping_section["nwrite"] > timestepping_section["nstep"] @@ -224,38 +227,37 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) if timestepping_section["atol_upar"] === nothing timestepping_section["atol_upar"] = 1.0e-2 * timestepping_section["rtol"] end - timestepping_input = Dict_to_NamedTuple(timestepping_section) - if !(0.0 < timestepping_input.step_update_prefactor < 1.0) - error("step_update_prefactor=$(timestepping_input.step_update_prefactor) must " + if !(0.0 < timestepping_section["step_update_prefactor"] < 1.0) + error("step_update_prefactor=$(timestepping_section["step_update_prefactor"]) must " * "be between 0.0 and 1.0.") end - if timestepping_input.max_increase_factor ≤ 1.0 - error("max_increase_factor=$(timestepping_input.max_increase_factor) must " + if timestepping_section["max_increase_factor"] ≤ 1.0 + error("max_increase_factor=$(timestepping_section["max_increase_factor"]) must " * "be greater than 1.0.") end - if timestepping_input.max_increase_factor_near_last_fail ≤ 1.0 + if timestepping_section["max_increase_factor_near_last_fail"] ≤ 1.0 error("max_increase_factor_near_last_fail=" - * "$(timestepping_input.max_increase_factor_near_last_fail) must be " + * "$(timestepping_section["max_increase_factor_near_last_fail"]) must be " * "greater than 1.0.") end - if !isinf(timestepping_input.max_increase_factor_near_last_fail) && - timestepping_input.max_increase_factor_near_last_fail > timestepping_input.max_increase_factor + if !isinf(timestepping_section["max_increase_factor_near_last_fail"]) && + timestepping_section["max_increase_factor_near_last_fail"] > timestepping_section["max_increase_factor"] error("max_increase_factor_near_last_fail=" - * "$(timestepping_input.max_increase_factor_near_last_fail) should be " + * "$(timestepping_section["max_increase_factor_near_last_fail"]) should be " * "less than max_increase_factor=" - * "$(timestepping_input.max_increase_factor).") + * "$(timestepping_section["max_increase_factor"]).") end - if timestepping_input.last_fail_proximity_factor ≤ 1.0 + if timestepping_section["last_fail_proximity_factor"] ≤ 1.0 error("last_fail_proximity_factor=" - * "$(timestepping_input.last_fail_proximity_factor) must be " + * "$(timestepping_section["last_fail_proximity_factor"]) must be " * "greater than 1.0.") end - if timestepping_input.minimum_dt > timestepping_input.maximum_dt - error("minimum_dt=$(timestepping_input.minimum_dt) must be less than " - * "maximum_dt=$(timestepping_input.maximum_dt)") + if timestepping_section["minimum_dt"] > timestepping_section["maximum_dt"] + error("minimum_dt=$(timestepping_section["minimum_dt"]) must be less than " + * "maximum_dt=$(timestepping_section["maximum_dt"])") end - if timestepping_input.maximum_dt ≤ 0.0 - error("maximum_dt=$(timestepping_input.maximum_dt) must be positive") + if timestepping_section["maximum_dt"] ≤ 0.0 + error("maximum_dt=$(timestepping_section["maximum_dt"]) must be positive") end use_for_init_is_default = !(("manufactured_solns" ∈ keys(scan_input)) && @@ -662,12 +664,12 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) end # check input (and initialized coordinate structs) to catch errors/unsupported options - check_input(io, output_dir, timestepping_input.nstep, timestepping_input.dt, r, z, + check_input(io, output_dir, timestepping_section["nstep"], timestepping_section["dt"], r, z, vpa, vperp, composition, species_immutable, evolve_moments, num_diss_params, save_inputs_to_txt, collisions) # return immutable structs for z, vpa, species and composition - all_inputs = (io_immutable, evolve_moments, timestepping_input, z, z_spectral, r, + all_inputs = (io_immutable, evolve_moments, timestepping_section, z, z_spectral, r, r_spectral, vpa, vpa_spectral, vperp, vperp_spectral, gyrophase, gyrophase_spectral, vz, vz_spectral, vr, vr_spectral, vzeta, vzeta_spectral, composition, species_immutable, collisions, geometry, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl new file mode 100644 index 000000000..79a87fc09 --- /dev/null +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -0,0 +1,932 @@ +""" +Nonlinear solvers, using Jacobian-free Newton-Krylov methods. + +These solvers use an outer Newton iteration. Each step of the Newton iteration requires a +linear solve of the Jacobian. An 'inexact Jacobian' method is used, and the GMRES method +(GMRES is a type of Krylov solver) is used to (approximately) solve the (approximate) +linear system. + +!!! warning "parallelisation" + This module uses shared- and distributed-memory parallelism, so the functions in it + should not be called inside any kind of parallelised loop. This restriction should be + lifted somehow in future... + +`parallel_map()` is used to apply elementwise functions to arbitrary numbers of arguments +using shared-memory parallelism. We do this rather than writing the loops out explicitly +so that `newton_solve!()` and `linear_solve!()` can work for arrays with any combination +of dimensions. + +Useful references: +[1] V.A. Mousseau and D.A. Knoll, "Fully Implicit Kinetic Solution of Collisional Plasmas", Journal of Computational Physics 136, 308–323 (1997), https://doi.org/10.1006/jcph.1997.5736. +[2] V.A. Mousseau, "Fully Implicit Kinetic Modelling of Collisional Plasmas", PhD thesis, Idaho National Engineering Laboratory (1996), https://inis.iaea.org/collection/NCLCollectionStore/_Public/27/067/27067141.pdf. +[3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method +[4] https://www.rikvoorhaar.com/blog/gmres +[5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. +""" +module nonlinear_solvers + +export setup_nonlinear_solve, gather_nonlinear_solver_counters!, + reset_nonlinear_per_stage_counters, newton_solve! + +using ..array_allocation: allocate_float, allocate_shared_float +using ..communication +using ..coordinates: coordinate +using ..input_structs +using ..looping +using ..type_definitions: mk_float, mk_int + +using LinearAlgebra +using MINPACK +using MPI +using SparseArrays + +struct nl_solver_info{TH,TV,Tlig,Tprecon} + rtol::mk_float + atol::mk_float + nonlinear_max_iterations::mk_int + linear_rtol::mk_float + linear_atol::mk_float + linear_restart::mk_int + linear_max_restarts::mk_int + H::TH + V::TV + linear_initial_guess::Tlig + n_solves::Ref{mk_int} + nonlinear_iterations::Ref{mk_int} + linear_iterations::Ref{mk_int} + global_n_solves::Ref{mk_int} + global_nonlinear_iterations::Ref{mk_int} + global_linear_iterations::Ref{mk_int} + stage_counter::Ref{mk_int} + serial_solve::Bool + max_nonlinear_iterations_this_step::Ref{mk_int} + preconditioner_update_interval::mk_int + preconditioners::Tprecon +end + +""" + +`coords` is a NamedTuple of coordinates corresponding to the dimensions of the variable +that will be solved. The entries in `coords` should be ordered the same as the memory +layout of the variable to be solved (i.e. fastest-varying first). + +The nonlinear solver will be called inside a loop over `outer_coords`, so we might need +for example a preconditioner object for each point in that outer loop. +""" +function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol=1.0e-5, + default_atol=1.0e-12, serial_solve=false, + preconditioner_type=nothing) + nl_solver_section = set_defaults_and_check_section!( + input_dict, "nonlinear_solver"; + rtol=default_rtol, + atol=default_atol, + nonlinear_max_iterations=20, + linear_rtol=1.0e-3, + linear_atol=1.0, + linear_restart=10, + linear_max_restarts=0, + preconditioner_update_interval=300, + ) + nl_solver_input = Dict_to_NamedTuple(nl_solver_section) + + coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ coords) + total_size_coords = prod(coord_sizes) + outer_coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ outer_coords) + + linear_restart = nl_solver_input.linear_restart + + if serial_solve + H = allocate_float(linear_restart + 1, linear_restart) + V = allocate_float(reverse(coord_sizes)..., linear_restart+1) + H .= 0.0 + V .= 0.0 + else + H = allocate_shared_float(linear_restart + 1, linear_restart) + V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) + + begin_serial_region() + @serial_region begin + H .= 0.0 + V .= 0.0 + end + end + + if preconditioner_type == "lu" + # Create dummy LU solver objects so we can create an array for preconditioners. + # These will be calculated properly within the time loop. + preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)), + reverse(outer_coord_sizes)) + else + preconditioners = nothing + end + + linear_initial_guess = zeros(linear_restart) + + return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, + nl_solver_input.nonlinear_max_iterations, + nl_solver_input.linear_rtol, nl_solver_input.linear_atol, + linear_restart, nl_solver_input.linear_max_restarts, H, V, + linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), + Ref(0), Ref(0), serial_solve, Ref(0), + nl_solver_input.preconditioner_update_interval, preconditioners) +end + +""" + reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + +Reset the counters that hold per-step totals or maximums in `nl_solver_params`. +""" +function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + if nl_solver_params === nothing + return nothing + end + + nl_solver_params.max_nonlinear_iterations_this_step[] = 0 + + return nothing +end + +""" + gather_nonlinear_solver_counters!(nl_solver_params) + +Where necessary, gather the iteration counters for the nonlinear solvers. + +Where each solve runs in parallel using all processes, this is unnecessary as the count on +each process already represents the global count. Where each solve uses only a subset of +processes, the counters from different solves need to be added together to get the global +total. +""" +function gather_nonlinear_solver_counters!(nl_solver_params) + if nl_solver_params.ion_advance !== nothing + # Solve runs in parallel on all processes, so no need to collect here + nl_solver_params.ion_advance.global_n_solves[] = nl_solver_params.ion_advance.n_solves[] + nl_solver_params.ion_advance.global_nonlinear_iterations[] = nl_solver_params.ion_advance.nonlinear_iterations[] + nl_solver_params.ion_advance.global_linear_iterations[] = nl_solver_params.ion_advance.linear_iterations[] + end + if nl_solver_params.vpa_advection !== nothing + # Solves are run in serial on separate processes, so need a global Allreduce + nl_solver_params.vpa_advection.global_n_solves[] = MPI.Allreduce(nl_solver_params.vpa_advection.n_solves[], +, comm_world) + nl_solver_params.vpa_advection.global_nonlinear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) + nl_solver_params.vpa_advection.global_linear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) + end +end + +""" + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, w, nl_solver_params; + left_preconditioner=nothing, right_preconditioner=nothing, coords) + +`x` is the initial guess at the solution, and is overwritten by the result of the Newton +solve. + +`rhs_func!(residual, x)` is the function we are trying to find a solution of. It calculates +```math +\\mathtt{residual} = F(\\mathtt{x}) +``` +where we are trying to solve \$F(x)=0\$. + +`residual`, `delta_x`, `rhs_delta` and `w` are buffer arrays, with the same size as `x`, +used internally. + +`left_preconditioner` or `right_preconditioner` apply preconditioning. They should be +passed a function that solves \$P.x = b\$ where \$P\$ is the preconditioner matrix, \$b\$ +is given by the values passed to the function as the argument, and the result \$x\$ is +returned by overwriting the argument. + +`coords` is a NamedTuple containing the `coordinate` structs corresponding to each +dimension in `x`. + + +Tolerances +---------- + +Note that the meaning of the relative tolerance `rtol` and absolute tolerance `atol` is +very different for the outer Newton iteration and the inner GMRES iteration. + +For the outer Newton iteration the residual \$R(x^n)\$ measures the departure of the +system from the solution (at each grid point). Its size can be compared to the size of the +solution `x`, so it makes sense to define an `error norm' for \$R(x^n)\$ as +```math +E(x^n) = \\left\\lVert \\frac{R(x^n)}{\\mathtt{rtol} x^n \\mathtt{atol}} \\right\\rVert_2 +``` +where \$\\left\\lVert \\cdot \\right\\rVert\$ is the 'L2 norm' (square-root of sum of +squares). We can further try to define a grid-size independent error norm by dividing out +the number of grid points to get a root-mean-square (RMS) error rather than an L2 norm. +```math +E_{\\mathrm{RMS}}(x^n) = \\sqrt{ \\frac{1}{N} \\sum_i \\frac{R(x^n)_i}{\\mathtt{rtol} x^n_i \\mathtt{atol}} } +``` +where \$N\$ is the total number of grid points. + +In contrast, GMRES is constructed to minimise the L2 norm of \$r_k = b - A\\cdot x_k\$ +where GMRES is solving the linear system \$A\\cdot x = b\$, \$x_k\$ is the approximation +to the solution \$x\$ at the \$k\$'th iteration and \$r_k\$ is the residual at the +\$k\$'th iteration. There is no flexibility to measure error relative to \$x\$ in any +sense. For GMRES, a `relative tolerance' is relative to the residual of the +right-hand-side \$b\$, which is the first iterate \$x_0\$ (when no initial guess is +given). [Where a non-zero initial guess is given it might be better to use a different +stopping criterion, see Carson et al. section 3.8.]. The stopping criterion for the GMRES +iteration is therefore +``` +\\left\\lVert r_k \\right\\rVert < \\max(\\mathtt{linear\\_rtol} \\left\\lVert r_0 \\right\\rVert, \\mathtt{linear\\_atol}) = \\max(\\mathtt{linear\\_rtol} \\left\\lVert b \\right\\rVert, \\mathtt{linear\\_atol}) +``` +As the GMRES solve is only used to get the right `direction' for the next Newton step, it +is not necessary to have a very tight `linear_rtol` for the GMRES solve. +""" +function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params; left_preconditioner=nothing, + right_preconditioner=nothing, coords) + + rtol = nl_solver_params.rtol + atol = nl_solver_params.atol + + distributed_norm = get_distributed_norm(coords, rtol, atol, x) + distributed_dot = get_distributed_dot(coords, rtol, atol, x) + parallel_map = get_parallel_map(coords) + parallel_delta_x_calc = get_parallel_delta_x_calc(coords) + + residual_func!(residual, x) + residual_norm = distributed_norm(residual) + counter = 0 + linear_counter = 0 + + parallel_map(()->0.0, delta_x) + + close_counter = -1 + close_linear_counter = -1 + success = true + previous_residual_norm = residual_norm + while (counter < 1 && residual_norm > 1.0e-8) || residual_norm > 1.0 + counter += 1 + #println("\nNewton ", counter) + + if left_preconditioner === nothing + left_preconditioner = identity + end + if right_preconditioner === nothing + right_preconditioner = identity + end + + # Solve (approximately?): + # J δx = -RHS(x) + parallel_map(()->0.0, delta_x) + linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w; + coords=coords, rtol=nl_solver_params.linear_rtol, + atol=nl_solver_params.linear_atol, + restart=nl_solver_params.linear_restart, + max_restarts=nl_solver_params.linear_max_restarts, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, + H=nl_solver_params.H, V=nl_solver_params.V, + rhs_delta=rhs_delta, + initial_guess=nl_solver_params.linear_initial_guess, + distributed_norm=distributed_norm, + distributed_dot=distributed_dot, + parallel_map=parallel_map, + parallel_delta_x_calc=parallel_delta_x_calc, + serial_solve=nl_solver_params.serial_solve) + linear_counter += linear_its + + # If the residual does not decrease, we will do a line search to find an update + # that does decrease the residual. The value of `x` is used to define the + # normalisation value with rtol that is used to calculate the residual, so do not + # want to update it until the line search is completed (otherwise the norm changes + # during the line search, which might make it fail to converge). So calculate the + # updated value in the buffer `w` until the line search is completed, and only + # then copy it into `x`. + parallel_map((x) -> x, w, x) + parallel_map((x,delta_x) -> x + delta_x, w, x, delta_x) + residual_func!(residual, w) + + # For the Newton iteration, we want the norm divided by the (sqrt of the) number + # of grid points, so we can use a tolerance that is independent of the size of the + # grid. This is unlike the norms needed in `linear_solve!()`. + residual_norm = distributed_norm(residual) + if isnan(residual_norm) + error("NaN in Newton iteration at iteration $counter") + end + if residual_norm > previous_residual_norm + # Do a line search between x and x+delta_x to try to find an update that does + # decrease residual_norm + s = 0.5 + while s > 1.0e-2 + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_norm(residual) + if residual_norm ≤ previous_residual_norm + break + end + s *= 0.5 + end + + if residual_norm > previous_residual_norm + # Failed to find a point that decreases the residual, so try a negative + # step + s = -1.0e-5 + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_norm(residual) + if residual_norm > previous_residual_norm + # That didn't work either, so just take the full step and hope for + # convergence later + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_norm(residual) + end + end + end + parallel_map((w) -> w, x, w) + previous_residual_norm = residual_norm + + #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol") + + if residual_norm < 0.1/rtol && close_counter < 0 && close_linear_counter < 0 + close_counter = counter + close_linear_counter = linear_counter + end + + if counter > nl_solver_params.nonlinear_max_iterations + println("maximum iteration limit reached") + success = false + break + end + end + nl_solver_params.n_solves[] += 1 + nl_solver_params.nonlinear_iterations[] += counter + nl_solver_params.linear_iterations[] += linear_counter + nl_solver_params.max_nonlinear_iterations_this_step[] = + max(counter, nl_solver_params.max_nonlinear_iterations_this_step[]) +# println("Newton iterations: ", counter) +# println("Final residual: ", residual_norm) +# println("Total linear iterations: ", linear_counter) +# println("Linear iterations per Newton: ", linear_counter / counter) +# +# println("Newton iterations after close: ", counter - close_counter) +# println("Total linear iterations after close: ", linear_counter - close_linear_counter) +# println("Linear iterations per Newton after close: ", (linear_counter - close_linear_counter) / (counter - close_counter)) +# println() + + return success +end + +""" + get_distributed_norm(coords, rtol, atol, x) + +Get a 'distributed_norm' function that acts on arrays with dimensions given by the +entries in `coords`. +""" +function get_distributed_norm(coords, rtol, atol, x) + dims = keys(coords) + if dims == (:z,) + this_norm = distributed_norm_z + elseif dims == (:vpa,) + this_norm = distributed_norm_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + this_norm = distributed_norm_s_r_z_vperp_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`distributed_norm_*()` function in nonlinear_solvers.jl") + end + + wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x=x, + coords=coords, kwargs...) + + return wrapped_norm +end + +function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) + z = coords.z + + begin_z_region() + + local_norm = 0.0 + if z.irank < z.nrank - 1 + zend = z.n + @loop_z iz begin + if iz == zend + continue + end + local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2 + end + else + @loop_z iz begin + local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2 + end + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm / z.n_global) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + +function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + residual_norm = 0.0 + for i ∈ eachindex(residual, x) + residual_norm += (residual[i] / (rtol * abs(x[i]) + atol))^2 + end + + residual_norm = sqrt(residual_norm / length(residual)) + + return residual_norm +end + +function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; + coords, rtol, atol, x) + n_ion_species = coords.s + r = coords.r + z = coords.z + vperp = coords.vperp + vpa = coords.vpa + + begin_s_r_z_vperp_vpa_region() + + local_norm = 0.0 + if r.irank < r.nrank - 1 + rend = r.n + else + rend = r.n + 1 + end + if z.irank < z.nrank - 1 + zend = z.n + else + zend = z.n + 1 + end + @loop_s_r_z is ir iz begin + if ir == rend || iz == zend + continue + end + @loop_vperp_vpa ivperp ivpa begin + local_norm += (residual[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol))^2 + end + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + +""" + get_distributed_dot(coords, rtol, atol, x) + +Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries +in `coords`. +""" +function get_distributed_dot(coords, rtol, atol, x) + dims = keys(coords) + if dims == (:z,) + this_dot = distributed_dot_z + elseif dims == (:vpa,) + this_dot = distributed_dot_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + this_dot = distributed_dot_s_r_z_vperp_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`distributed_dot_*()` function in nonlinear_solvers.jl") + end + + wrapped_dot = (args...; kwargs...) -> this_dot(args...; rtol=rtol, atol=atol, x=x, + coords=coords, kwargs...) + +end + +function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; + coords, atol, rtol, x) + + z = coords.z + + begin_z_region() + + z = coords.z + + local_dot = 0.0 + if z.irank < z.nrank - 1 + zend = z.n + @loop_z iz begin + if iz == zend + continue + end + local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2 + end + else + @loop_z iz begin + local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2 + end + end + + _block_synchronize() + block_dot = MPI.Reduce(local_dot, +, comm_block[]) + + if block_rank[] == 0 + global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + global_dot = global_dot / z.n_global + else + global_dot = nothing + end + + return global_dot +end + +function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; + coords, atol, rtol, x) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + local_dot = 0.0 + for i ∈ eachindex(v,w) + local_dot += v[i] * w[i] / (rtol * abs(x[i]) + atol)^2 + end + local_dot = local_dot / length(v) + return local_dot +end + +function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, + w::AbstractArray{mk_float, 5}; + coords, atol, rtol, x) + n_ion_species = coords.s + r = coords.r + z = coords.z + vperp = coords.vperp + vpa = coords.vpa + + begin_s_r_z_vperp_vpa_region() + + local_dot = 0.0 + if r.irank < r.nrank - 1 + rend = r.n + else + rend = r.n + 1 + end + if z.irank < z.nrank - 1 + zend = z.n + else + zend = z.n + 1 + end + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + if ir == rend || iz == zend + continue + end + local_dot += v[ivpa,ivperp,iz,ir,is] * w[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol)^2 + end + + _block_synchronize() + block_dot = MPI.Reduce(local_dot, +, comm_block[]) + + if block_rank[] == 0 + global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + global_dot = global_dot / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) + else + global_dot = nothing + end + + return global_dot +end + +""" + get_parallel_map(coords) + +Get a 'parallel_map' function that acts on arrays with dimensions given by the entries in +`coords`. +""" +function get_parallel_map(coords) + dims = keys(coords) + if dims == (:z,) + return parallel_map_z + elseif dims == (:vpa,) + return parallel_map_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return parallel_map_s_r_z_vperp_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`parallel_map_*()` function in nonlinear_solvers.jl") + end +end + +# Separate versions for different numbers of arguments as generator expressions result in +# slow code + +function parallel_map_z(func, result::AbstractArray{mk_float, 1}) + + begin_z_region() + + @loop_z iz begin + result[iz] = func() + end + + return nothing +end +function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) + + begin_z_region() + + @loop_z iz begin + result[iz] = func(x1[iz]) + end + + return nothing +end +function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1, x2) + + begin_z_region() + + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz]) + end + + return nothing +end + +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + for i ∈ eachindex(result) + result[i] = func() + end + return nothing +end +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + for i ∈ eachindex(result) + result[i] = func(x1[i]) + end + return nothing +end +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i]) + end + return nothing +end + +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func() + end + + return nothing +end +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is]) + end + + return nothing +end +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1, x2) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + end + + return nothing +end + +""" + get_parallel_delta_x_calc(coords) + +Get a parallelised function that calculates the update `delta_x` from the `V` matrix and +the minimum residual coefficients `y`. +""" +function get_parallel_delta_x_calc(coords) + dims = keys(coords) + if dims == (:z,) + return parallel_delta_x_calc_z + elseif dims == (:vpa,) + return parallel_delta_x_calc_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return parallel_delta_x_calc_s_r_z_vperp_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`parallel_delta_x_calc_*()` function in nonlinear_solvers.jl") + end +end + +function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) + + begin_z_region() + + ny = length(y) + @loop_z iz begin + for iy ∈ 1:ny + delta_x[iz] += y[iy] * V[iz,iy] + end + end + + return nothing +end + +function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + ny = length(y) + for ivpa ∈ eachindex(delta_x) + for iy ∈ 1:ny + delta_x[ivpa] += y[iy] * V[ivpa,iy] + end + end + return nothing +end + +function parallel_delta_x_calc_s_r_z_vperp_vpa(delta_x::AbstractArray{mk_float, 5}, V, y) + + begin_s_r_z_vperp_vpa_region() + + ny = length(y) + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + for iy ∈ 1:ny + delta_x[ivpa,ivperp,iz,ir,is] += y[iy] * V[ivpa,ivperp,iz,ir,is,iy] + end + end + + return nothing +end + +""" +Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed +at each step of the outer Newton iteration (in `newton_solve!()`). +""" +function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, + restart, max_restarts, left_preconditioner, right_preconditioner, + H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot, + parallel_map, parallel_delta_x_calc, serial_solve) + # Solve (approximately?): + # J δx = residual0 + + Jv_scale_factor = 1.0e3 + inv_Jv_scale_factor = 1.0 / Jv_scale_factor + + # The vectors `v` that are passed to this function will be normalised so that + # `distributed_norm(v) == 1.0`. `distributed_norm()` is defined - including the + # relative and absolute tolerances from the Newton iteration - so that a vector with a + # norm of 1.0 is 'small' in the sense that a vector with a norm of 1.0 is small enough + # relative to `x` to consider the iteration converged. This means that `x+v` would be + # very close to `x`, so R(x+v)-R(x) would be likely to be badly affected by rounding + # errors, because `v` is so small, relative to `x`. We actually want to multiply `v` + # by a large number `Jv_scale_factor` (in constrast to the small `epsilon` in the + # 'usual' case where the norm does not include either reative or absolute tolerance) + # to ensure that we get a reasonable estimate of J.v. + function approximate_Jacobian_vector_product!(v) + right_preconditioner(v) + + parallel_map((x,v) -> x + Jv_scale_factor * v, v, x, v) + residual_func!(rhs_delta, v) + parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, + v, rhs_delta, residual0) + left_preconditioner(v) + return v + end + + # To start with we use 'w' as a buffer to make a copy of residual0 to which we can apply + # the left-preconditioner. + parallel_map((delta_x) -> delta_x, v, delta_x) + left_preconditioner(residual0) + # This function transforms the data stored in 'v' from δx to ≈J.δx + approximate_Jacobian_vector_product!(v) + # Now we actually set 'w' as the first Krylov vector, and normalise it. + parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) + beta = distributed_norm(w) + parallel_map((w) -> w/beta, selectdim(V,ndims(V),1), w) + + # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is + # so small that it is smaller than atol, in which case use atol instead. + tol = max(rtol * beta, atol) + + lsq_result = nothing + residual = Inf + counter = 0 + restart_counter = 1 + while true + for i ∈ 1:restart + counter += 1 + #println("Linear ", counter) + + # Compute next Krylov vector + parallel_map((V) -> V, w, selectdim(V,ndims(V),i)) + approximate_Jacobian_vector_product!(w) + + # Gram-Schmidt orthogonalization + for j ∈ 1:i + parallel_map((V) -> V, v, selectdim(V,ndims(V),j)) + w_dot_Vj = distributed_dot(w, v) + if serial_solve + H[j,i] = w_dot_Vj + else + begin_serial_region() + @serial_region begin + H[j,i] = w_dot_Vj + end + end + parallel_map((w, V) -> w - H[j,i] * V, w, w, selectdim(V,ndims(V),j)) + end + norm_w = distributed_norm(w) + if serial_solve + H[i+1,i] = norm_w + else + begin_serial_region() + @serial_region begin + H[i+1,i] = norm_w + end + end + parallel_map((w) -> w / H[i+1,i], selectdim(V,ndims(V),i+1), w) + + function temporary_residual!(result, guess) + #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess)) + result .= @view(H[1:i+1,1:i]) * guess + result[1] -= beta + end + + # Second argument to fsolve needs to be a Vector{Float64} + if serial_solve + resize!(initial_guess, i) + initial_guess[1] = beta + initial_guess[2:i] .= 0.0 + lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) + residual = norm(lsq_result.f) + else + begin_serial_region() + if global_rank[] == 0 + resize!(initial_guess, i) + initial_guess[1] = beta + initial_guess[2:i] .= 0.0 + lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) + residual = norm(lsq_result.f) + else + residual = nothing + end + residual = MPI.bcast(residual, comm_world; root=0) + end + if residual < tol + break + end + end + + # Update initial guess fo restart + if serial_solve + y = lsq_result.x + else + if global_rank[] == 0 + y = lsq_result.x + else + y = nothing + end + y = MPI.bcast(y, comm_world; root=0) + end + + # The following is the `parallel_map()` version of + # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) + # slightly abusing splatting to get the sum into a lambda-function. + parallel_delta_x_calc(delta_x, V, y) + right_preconditioner(delta_x) + + if residual < tol || restart_counter > max_restarts + break + end + + restart_counter += 1 + + # Store J.delta_x in the variable delta_x, to use it to calculate the new first + # Krylov vector v/beta. + parallel_map((delta_x) -> delta_x, v, delta_x) + approximate_Jacobian_vector_product!(v) + + # Note residual0 has already had the left_preconditioner!() applied to it. + parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) + beta = distributed_norm(v) + for i ∈ 2:length(y) + parallel_map(() -> 0.0, selectdim(V,ndims(V),i)) + end + parallel_map((v) -> v/beta, selectdim(V,ndims(V),1), v) + end + + return counter +end + +end diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 92baf111b..d2767a90f 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -22,6 +22,10 @@ e.g., if f is the function to be updated, then f^{n+1}[stage+1] = rk_coef[1,stage]*f^{n} + rk_coef[2,stage]*f^{n+1}[stage] + rk_coef[3,stage]*(f^{n}+dt*G[f^{n+1}[stage]] """ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operators) + + rk_coefs_implicit = nothing + implicit_coefficient_is_zero = nothing + if type == "RKF5(4)" # Embedded 5th order / 4th order Runge-Kutta-Fehlberg method. # Note uses the 5th order solution for the time advance, even though the error @@ -39,7 +43,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1 ; 0 0 0 0 -11//40 34//55 8//11 ; - 0 0 0 0 0 2//55 -1 ] + 0 0 0 0 0 2//55 0 ] n_rk_stages = 6 rk_order = 5 adaptive = true @@ -74,7 +78,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; - 0 0 0 0 0 0 0 0 0 1//10 -1 ] + 0 0 0 0 0 0 0 0 0 1//10 0 ] n_rk_stages = 10 rk_order = 4 adaptive = true @@ -87,22 +91,22 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat elseif type == "Fekete6(4)" # Fekete 6-stage 4th-order SSPRK (see comments in util/calculate_rk_coeffs.jl. # Note Fekete et al. recommend the 10-stage method rather than this one. - #rk_coeffs = mk_float[0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; - # 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094 ; - # 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622 ; - # 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522 ; - # 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305 ; - # 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745 ; - # 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524 ] + #rk_coefs = mk_float[0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 -6.304828384656085e-17 -0.1813232670344333 -1.0017300417984671 ; + # 0.3552975516919 0.4295138541066736 -6.460461358323626e-14 -1.1868936325049587e-13 3.608184516786869e-18 2.9392365006883485e-14 -0.18902907903375094; + # 0.0 0.33178669836 0.25530138316744333 -3.3545605887402925e-14 -1.0929532856876731e-17 0.20598080026766677 0.2504712436879622 ; + # 0.0 0.0 0.1972127376054 0.3518900216285391 7.036963218665071e-17 0.47926701162417157 -0.939747918037452 ; + # 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.97599117309567e-14 1.1993626679930303 ; + # 0.0 0.0 0.0 0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745 ; + # 0.0 0.0 0.0 0.0 0.0 0.1544186678729 1.2117066988196523 ] # Might as well set to 0 the entries that look like they should be 0 apart from # rounding errors. - rk_coefs = mk_float[0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; - 0.3552975516919 0.4295138541066736 0.0 0.0 0.0 0.0 -0.18902907903375094 ; - 0.0 0.33178669836 0.25530138316744333 0.0 0.0 0.2059808002676668 0.2504712436879622 ; - 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522 ; - 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 0.0 1.1993626679930305 ; - 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745 ; - 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524 ] + rk_coefs = mk_float[0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 0.0 -0.1813232670344333 -0.0017300417984673633 ; + 0.3552975516919 0.4295138541066736 0.0 0.0 0.0 0.0 -0.18902907903375094 ; + 0.0 0.33178669836 0.25530138316744333 0.0 0.0 0.20598080026766677 0.2504712436879622 ; + 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.47926701162417157 -0.939747918037452 ; + 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 0.0 1.1993626679930303 ; + 0.0 0.0 0.0 0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745 ; + 0.0 0.0 0.0 0.0 0.0 0.1544186678729 1.2117066988196523 ] n_rk_stages = 6 rk_order = 4 adaptive = true @@ -118,7 +122,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat # the addition of a truncation error estimate. rk_coefs = mk_float[1//2 0 2//3 0 -1//2; 0 1//2 1//6 1//2 1 ; - 1//2 1//2 1//6 1//2 -1//2] + 1//2 1//2 1//6 1//2 1//2] n_rk_stages = 4 rk_order = 3 adaptive = true @@ -134,7 +138,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0 ; 0 0 1//3 1//2 3//16; - 0 0 0 1//4 -1//4 ] + 0 0 0 1//4 3//4 ] n_rk_stages = 4 rk_order = 2 adaptive = true @@ -144,6 +148,57 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat else CFL_prefactor = input_CFL_prefactor end + elseif type == "KennedyCarpenterARK437" + # 7-stage 4th-order IMEX scheme from Kennedy & Carpenter 2019 + # (https://doi.org/10.1016/j.apnum.2018.10.007) + rk_coefs = mk_float[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; + 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; + 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; + 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; + 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; + 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; + 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; + 0 0 0 0 0 0 247//2000 2441//2470] + rk_coefs_implicit = mk_float[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; + 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; + 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; + 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; + 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; + 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; + 0 0 0 0 0 0 247//2000 1753//2000 0] + implicit_coefficient_is_zero = Bool[true, false, false, false, false, false, false] + n_rk_stages = 7 + rk_order = 4 + adaptive = true + low_storage = false + if input_CFL_prefactor ≤ 0.0 + CFL_prefactor = 4.0 + else + CFL_prefactor = input_CFL_prefactor + end + elseif type == "KennedyCarpenterARK324" + # 4-stage 3th-order IMEX scheme from Kennedy & Carpenter 2003 + # (https://doi.org/10.1016/S0168-9274(02)00138-1, + # https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf) + rk_coefs = mk_float[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; + 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; + 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; + 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; + 0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825] + rk_coefs_implicit = mk_float[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; + 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; + 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; + 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0] + implicit_coefficient_is_zero = Bool[true, false, false, false] + n_rk_stages = 4 + rk_order = 3 + adaptive = true + low_storage = false + if input_CFL_prefactor ≤ 0.0 + CFL_prefactor = 4.0 + else + CFL_prefactor = input_CFL_prefactor + end elseif type == "SSPRK4" n_rk_stages = 4 rk_coefs = allocate_float(3, n_rk_stages) @@ -197,35 +252,71 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat CFL_prefactor = NaN else error("Unsupported RK timestep method, type=$type\n" - * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4)," - * "Fekete6(4), Fekete4(3), Fekete4(2)") + * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4), " + * "Fekete6(4), Fekete4(3), Fekete4(2), KennedyCarpenterARK437, " + * "KennedyCarpenterARK324") end if split_operators && adaptive error("Adaptive timestepping not supported with operator splitting") end - return rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor + # Sanity check size of rk_coefs arrays + if low_storage + correct_size = (3, n_rk_stages + adaptive) + if size(rk_coefs) != correct_size + error("Size of rk_coefs, $(size(rk_coefs)) is not " + * "(n_rk_stages+1, n_rk_stages+1)=$correct_size") + end + + correct_size_implicit = (3, n_rk_stages + 1 + adaptive) + if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit + error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not " + * "(3, n_rk_stages+2)=$correct_size_implicit") + end + else + correct_size = (n_rk_stages + 1, n_rk_stages + adaptive) + if size(rk_coefs) != correct_size + error("Size of rk_coefs, $(size(rk_coefs)) is not " + * "(n_rk_stages+1, n_rk_stages+1)=$correct_size") + end + + correct_size_implicit = (n_rk_stages, n_rk_stages + 1 + adaptive) + if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit + error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not " + * "(n_rk_stages, n_rk_stages+2)=$correct_size_implicit") + end + end + + correct_size = (n_rk_stages,) + if implicit_coefficient_is_zero !== nothing && + size(implicit_coefficient_is_zero) != correct_size + error("Size of implicit_coefficient_is_zero, $(size(implicit_coefficient_is_zero)) " + * "is not (n_rk_stages,)=$correct_size") + end + + return rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, + rk_order, adaptive, low_storage, CFL_prefactor end """ use Runge Kutta to update any ion velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments!(scratch, moments, t_params, istage) +function rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage) # if separately evolving the particle density, update using RK if moments.evolve_density - rk_update_variable!(scratch, :density, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :density, t_params, istage) end # if separately evolving the parallel flow, update using RK if moments.evolve_upar - rk_update_variable!(scratch, :upar, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :upar, t_params, istage) end # if separately evolving the parallel pressure, update using RK; if moments.evolve_ppar - rk_update_variable!(scratch, :ppar, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :ppar, t_params, istage) end end @@ -233,30 +324,35 @@ end use Runge Kutta to update any electron velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments_electron!(scratch, moments, t_params, istage) +function rk_update_evolved_moments_electron!(scratch, scratch_implicit, moments, t_params, + istage) # For now, electrons always fully moment kinetic, and ppar is the only evolving moment # (density and upar are calculated from quasineutrality and ambipolarity constraints). - rk_update_variable!(scratch, :ppar_electron, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :ppar_electron, t_params, istage) end """ use Runge Kutta to update any neutral-particle velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage) +function rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params, + istage) # if separately evolving the particle density, update using RK if moments.evolve_density - rk_update_variable!(scratch, :density_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :density_neutral, t_params, istage; + neutrals=true) end # if separately evolving the parallel flow, update using RK if moments.evolve_upar - rk_update_variable!(scratch, :uz_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :uz_neutral, t_params, istage; + neutrals=true) end # if separately evolving the parallel pressure, update using RK; if moments.evolve_ppar - rk_update_variable!(scratch, :pz_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :pz_neutral, t_params, istage; + neutrals=true) end end @@ -264,29 +360,52 @@ end Update the variable named `var_symbol` in `scratch` to the current Runge-Kutta stage `istage`. The current value in `scratch[istage+1]` is the result of the forward-Euler update, which needs to be corrected using values from previous stages with the Runge-Kutta -coefficients. +coefficients. `scratch_implicit` contains the results of backward-Euler updates, which are +needed for IMEX timestepping schemes. """ -function rk_update_variable!(scratch, var_symbol::Symbol, t_params, istage; neutrals=false) +function rk_update_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_params, + istage; neutrals=false) if t_params.low_storage var_arrays = (getfield(scratch[istage+1], var_symbol), getfield(scratch[istage], var_symbol), getfield(scratch[1], var_symbol)) + if scratch_implicit === nothing + var_arrays_implicit = (nothing, nothing, nothing) + else + var_arrays_implicit = (getfield(scratch_implicit[istage+1], var_symbol), + getfield(scratch_implicit[istage], var_symbol), + getfield(scratch_implicit[1], var_symbol)) + end else var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:istage+1) + if scratch_implicit === nothing + var_arrays_implicit = nothing + else + var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol) + for i ∈ 1:istage) + end end rk_coefs = @view t_params.rk_coefs[:,istage] + if t_params.rk_coefs_implicit === nothing + rk_coefs_implicit = nothing + else + rk_coefs_implicit = @view t_params.rk_coefs_implicit[:,istage+1] + end if neutrals if t_params.low_storage - rk_update_loop_neutrals_low_storage!(rk_coefs, var_arrays...) + rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + var_arrays..., var_arrays_implicit...) else - rk_update_loop_neutrals!(rk_coefs, var_arrays) + rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, var_arrays, + var_arrays_implicit) end else if t_params.low_storage - rk_update_loop_low_storage!(rk_coefs, var_arrays...) + rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, var_arrays..., + var_arrays_implicit...) else - rk_update_loop!(rk_coefs, var_arrays) + rk_update_loop!(rk_coefs, rk_coefs_implicit, var_arrays, var_arrays_implicit) end end @@ -294,44 +413,67 @@ function rk_update_variable!(scratch, var_symbol::Symbol, t_params, istage; neut end """ -Calculate the estimated truncation error for the variable named `var_symbol`, for adaptive -timestepping methods. +Calculate a lower-order approximation for the variable named `var_symbol`, which can be +used to calculate an error estimate for adaptive timestepping methods. -The calculated error is stored in `var_symbol` in `scratch[2]` (as this entry should not -be needed again after the error is calculated). +The lower-order approximation is stored in `var_symbol` in `scratch[2]` (as this entry +should not be needed again after the lower-order approximation is calculated). """ -function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=false) +function rk_loworder_solution!(scratch, scratch_implicit, var_symbol::Symbol, t_params; + neutrals=false) if !t_params.adaptive - error("rk_error_variable!() should only be called when using adaptive " + error("rk_lowerder_solution!() should only be called when using adaptive " * "timestepping") end if t_params.low_storage var_arrays = (getfield(scratch[end], var_symbol), getfield(scratch[end-1], var_symbol), getfield(scratch[1], var_symbol)) + if scratch_implicit === nothing + var_arrays_implicit = (nothing, nothing, nothing) + else + var_arrays_implicit = (getfield(scratch_implicit[end], var_symbol), + getfield(scratch_implicit[end-1], var_symbol), + getfield(scratch_implicit[1], var_symbol)) + end else var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:length(scratch)) + if scratch_implicit === nothing + var_arrays_implicit = nothing + else + var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol) + for i ∈ 1:length(scratch_implicit)) + end end - error_coefs = @view t_params.rk_coefs[:,end] + loworder_coefs = @view t_params.rk_coefs[:,end] + if t_params.rk_coefs_implicit === nothing + loworder_coefs_implicit = nothing + else + loworder_coefs_implicit = @view t_params.rk_coefs_implicit[:,end] + end # The second element of `scratch` is not needed any more for the RK update, so we can - # overwrite it with the error estimate. + # overwrite it with the lower-order approximation. output = getfield(scratch[2], var_symbol) if neutrals if t_params.low_storage - rk_update_loop_neutrals_low_storage!(error_coefs, var_arrays...; + rk_update_loop_neutrals_low_storage!(loworder_coefs, loworder_coefs_implicit, + var_arrays..., var_arrays_implicit...; output=output) else - rk_update_loop_neutrals!(error_coefs, var_arrays; output=output) + rk_update_loop_neutrals!(loworder_coefs, loworder_coefs_implicit, var_arrays, + var_arrays_implicit; output=output) end else if t_params.low_storage - rk_update_loop_low_storage!(error_coefs, var_arrays...; + rk_update_loop_low_storage!(loworder_coefs, loworder_coefs_implicit, + var_arrays..., var_arrays_implicit...; output=output) else - rk_update_loop!(error_coefs, var_arrays; output=output) + rk_update_loop!(loworder_coefs, loworder_coefs_implicit, var_arrays, + var_arrays_implicit; output=output) end end @@ -339,172 +481,295 @@ function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=fals end # Ion distribution function -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,5}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,5}, old::AbstractArray{mk_float,5}, - first::AbstractArray{mk_float,5}; output=new) + first::AbstractArray{mk_float,5}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + - rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + - rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + if rk_coefs_implicit === nothing + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + + rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir,is] + + rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir,is] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,5}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,5}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir,is] = - sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N-1) + end end return nothing end # Ion moments -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,3}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,3}, old::AbstractArray{mk_float,3}, - first::AbstractArray{mk_float,3}; output=new) + first::AbstractArray{mk_float,3}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_s_r_z_region() - @loop_s_r_z is ir iz begin - output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + - rk_coefs[2]*old[iz,ir,is] + - rk_coefs[3]*new[iz,ir,is] + if rk_coefs_implicit === nothing + @loop_s_r_z is ir iz begin + output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + + rk_coefs[2]*old[iz,ir,is] + + rk_coefs[3]*new[iz,ir,is] + end + else + @loop_s_r_z is ir iz begin + output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + + rk_coefs[2]*old[iz,ir,is] + + rk_coefs[3]*new[iz,ir,is] + + rk_coefs_implicit[1]*first_implicit[iz,ir,is] + + rk_coefs_implicit[2]*old_implicit[iz,ir,is] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,3}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,3}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_s_r_z_region() - @loop_s_r_z is ir iz begin - output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_s_r_z is ir iz begin + output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + end + else + @loop_s_r_z is ir iz begin + output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,is] for i ∈ 1:N-1) + end end return nothing end # Electron distribution function -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,4}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,4}, old::AbstractArray{mk_float,4}, - first::AbstractArray{mk_float,4}; output=new) + first::AbstractArray{mk_float,4}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + - rk_coefs[2]*old[ivpa,ivperp,iz,ir] + - rk_coefs[3]*new[ivpa,ivperp,iz,ir] + if rk_coefs_implicit === nothing + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir] + end + else + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir] + + rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir] + + rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,4}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,4}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir] = - sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + end + else + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir] + for i ∈ 1:N-1) + end end return nothing end # Electron moments -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,2}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,2}, old::AbstractArray{mk_float,2}, - first::AbstractArray{mk_float,2}; output=new) + first::AbstractArray{mk_float,2}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_r_z_region() - @loop_r_z ir iz begin - output[iz,ir] = rk_coefs[1]*first[iz,ir] + - rk_coefs[2]*old[iz,ir] + - rk_coefs[3]*new[iz,ir] + if rk_coefs_implicit === nothing + @loop_r_z ir iz begin + output[iz,ir] = rk_coefs[1]*first[iz,ir] + + rk_coefs[2]*old[iz,ir] + + rk_coefs[3]*new[iz,ir] + end + else + @loop_r_z ir iz begin + output[iz,ir] = rk_coefs[1]*first[iz,ir] + + rk_coefs[2]*old[iz,ir] + + rk_coefs[3]*new[iz,ir] + + rk_coefs_implicit[1]*first_implicit[iz,ir] + + rk_coefs_implicit[2]*old_implicit[iz,ir] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,2}}; +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,2}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_r_z_region() - @loop_r_z ir iz begin - output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_r_z ir iz begin + output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + end + else + @loop_r_z ir iz begin + output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir] + for i ∈ 1:N-1) + end end return nothing end # Neutral distribution function -function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,6}, - old::AbstractArray{mk_float,6}, - first::AbstractArray{mk_float,6}; output=new) +function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,6}, + old::AbstractArray{mk_float,6}, + first::AbstractArray{mk_float,6}, + new_implicit, old_implicit, first_implicit; + output=new) @boundscheck length(rk_coefs) == 3 begin_sn_r_z_vzeta_vr_vz_region() - @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + - rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + - rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + if rk_coefs_implicit === nothing + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + end + else + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs_implicit[1]*first_implicit[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs_implicit[2]*old_implicit[ivz,ivr,ivzeta,iz,ir,isn] + end end return nothing end -function rk_update_loop_neutrals!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,6}}; - output=var_arrays[N]) where N +function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,6}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_sn_r_z_vzeta_vr_vz_region() - @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - output[ivz,ivr,ivzeta,iz,ir,isn] = - sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = + sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + end + else + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = + sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivz,ivr,ivzeta,iz,ir,isn] + for i ∈ 1:N-1) + end end return nothing end # Neutral moments -function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,3}, +function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,3}, old::AbstractArray{mk_float,3}, - first::AbstractArray{mk_float,3}; + first::AbstractArray{mk_float,3}, + new_implicit, old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + - rk_coefs[2]*old[iz,ir,isn] + - rk_coefs[3]*new[iz,ir,isn] + if rk_coefs_implicit === nothing + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + + rk_coefs[2]*old[iz,ir,isn] + + rk_coefs[3]*new[iz,ir,isn] + end + else + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + + rk_coefs[2]*old[iz,ir,isn] + + rk_coefs[3]*new[iz,ir,isn] + + rk_coefs_implicit[1]*first_implicit[iz,ir,isn] + + rk_coefs_implicit[2]*old_implicit[iz,ir,isn] + end end return nothing end -function rk_update_loop_neutrals!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,3}}; - output=var_arrays[N]) where N +function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,3}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + end + else + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,isn] + for i ∈ 1:N-1) + end end return nothing @@ -542,13 +807,13 @@ be known at compile time, allowing this function to be efficient. """ function local_error_norm end -function local_error_norm(error::MPISharedArray{mk_float,2}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,2}, f::MPISharedArray{mk_float,2}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_r_z ir iz begin - error_norm = abs(error[iz,ir]) / (rtol*abs(f[iz,ir]) + atol) + error_norm = abs(f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol) f_max = max(f_max, error_norm) end return f_max @@ -558,12 +823,12 @@ function local_error_norm(error::MPISharedArray{mk_float,2}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir] / (rtol*abs(f[iz,ir]) + atol))^2 + error_norm = ((f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol))^2 L2sum += error_norm end # Will sum results from different processes in shared memory block after returning # from this function. - nz, nr = size(error) + nz, nr = size(f_loworder) if skip_r_inner nr -= 1 end @@ -575,7 +840,7 @@ function local_error_norm(error::MPISharedArray{mk_float,2}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,3}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,3}, f::MPISharedArray{mk_float,3}, rtol, atol, neutral=false; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) @@ -583,12 +848,12 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, f_max = -Inf if neutral @loop_sn_r_z isn ir iz begin - error_norm = abs(error[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol) + error_norm = abs(f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol) f_max = max(f_max, error_norm) end else @loop_s_r_z is ir iz begin - error_norm = abs(error[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol) + error_norm = abs(f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol) f_max = max(f_max, error_norm) end end @@ -600,7 +865,7 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir,isn] / (rtol*abs(f[iz,ir,isn]) + atol))^2 + error_norm = ((f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol))^2 L2sum += error_norm end else @@ -608,13 +873,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir,is] / (rtol*abs(f[iz,ir,is]) + atol))^2 + error_norm = ((f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol))^2 L2sum += error_norm end end # Will sum results from different processes in shared memory block after returning # from this function. - nz, nr, nspecies = size(error) + nz, nr, nspecies = size(f_loworder) if skip_r_inner nr -= 1 end @@ -626,13 +891,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,5}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,5}, f::MPISharedArray{mk_float,5}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - error_norm = abs(error[ivpa,ivperp,iz,ir,is]) / + error_norm = abs(f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) / (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol) f_max = max(f_max, error_norm) end @@ -643,13 +908,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[ivpa,ivperp,iz,ir,is] / + error_norm = ((f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) / (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol))^2 L2sum += error_norm end # Will sum results from different processes in shared memory block after returning # from this function. - nvpa, nvperp, nz, nr, nspecies = size(error) + nvpa, nvperp, nz, nr, nspecies = size(f_loworder) if skip_r_inner nr -= 1 end @@ -661,13 +926,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,6}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,6}, f::MPISharedArray{mk_float,6}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - error_norm = abs(error[ivz,ivr,ivzeta,iz,ir,isn]) / + error_norm = abs(f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f[ivz,ivr,ivzeta,iz,ir,isn]) / (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol) f_max = max(f_max, error_norm) end @@ -678,7 +943,7 @@ function local_error_norm(error::MPISharedArray{mk_float,6}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[ivz,ivr,ivzeta,iz,ir,isn] / + error_norm = ((f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f_loworder[ivz,ivr,ivzeta,iz,ir,isn]) / (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol))^2 L2sum += error_norm end @@ -692,12 +957,14 @@ end """ adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) + total_points, current_dt, error_norm_method, + success, nl_max_its_fraction) Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) + total_points, current_dt, error_norm_method, + success, nl_max_its_fraction) # Get global minimum of CFL limits CFL_limit = nothing this_limit_caused_by = nothing @@ -706,10 +973,10 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er CFL_limits = MPI.Allreduce(CFL_limits, min, comm_inter_block[]) CFL_limit_caused_by = argmin(CFL_limits) CFL_limit = CFL_limits[CFL_limit_caused_by] - # Reserve first five entries of t_params.limit_caused_by for accuracy, - # max_increase_factor, max_increase_factor_near_fail, minimum_dt and maximum_dt - # limits. - this_limit_caused_by = CFL_limit_caused_by + 5 + # Reserve first four entries of t_params.limit_caused_by for max_increase_factor, + # max_increase_factor_near_fail, minimum_dt, maximum_dt limits and + # high_nl_iterations, then the next `n_variables` for RK accuracy limits. + this_limit_caused_by = CFL_limit_caused_by + 5 + t_params.n_variables end if error_norm_method == "Linf" @@ -717,10 +984,12 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error_norms = MPI.Reduce(error_norms, max, comm_block[]; root=0) error_norm = nothing + max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks error_norms = MPI.Allreduce(error_norms, max, comm_inter_block[]) - error_norm = maximum(error_norms) + max_error_variable_index = argmax(error_norms) + error_norm = error_norms[max_error_variable_index] end error_norm = MPI.bcast(error_norm, 0, comm_block[]) elseif error_norm_method == "L2" @@ -728,6 +997,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error_norms = MPI.Reduce(error_norms, +, comm_block[]; root=0) error_norm = nothing + max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks error_norms = MPI.Allreduce(error_norms, +, comm_inter_block[]) @@ -740,6 +1010,9 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # larger number of points in the distribution functions does not mean that # error on the moments is ignored. error_norm = mean(error_norms) + + # Record which variable had the maximum error + max_error_variable_index = argmax(error_norms) end error_norm = MPI.bcast(error_norm, 0, comm_block[]) @@ -747,10 +1020,54 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error("Unrecognized error_norm_method '$method'") end - # Use current_dt instead of t_params.dt[] here because we are about to write to - # the shared-memory variable t_params.dt[] below, and we do not want to add an extra - # _block_synchronize() call after reading it here. - if error_norm > 1.0 && current_dt > t_params.minimum_dt + just_completed_output_step = false + + if !success + # Iteration failed in implicit part of timestep try decreasing timestep + + # Set scratch[end] equal to scratch[1] to start the timestep over + scratch_temp = scratch[t_params.n_rk_stages+1] + scratch[t_params.n_rk_stages+1] = scratch[1] + scratch[1] = scratch_temp + + @serial_region begin + t_params.failure_counter[] += 1 + + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end + + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_output[] = false + + # Decrease timestep by 1/2 - this factor should probably be settable! + # Note when nonlinear solve iteration fails, we do not enforce + # minimum_dt, as the timesolver must error if we do not decrease dt. + if t_params.dt[] > t_params.minimum_dt + # ...but try decreasing just to minimum_dt first, if the dt is still + # bigger than this. + t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) + else + t_params.dt[] = t_params.dt[] / 2.0 + end + + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 + + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here + t_params.failure_caused_by[end] += 1 + end + elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) + # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when + # t+dt=next_output_time. + # Use current_dt instead of t_params.dt[] here because we are about to write to + # the shared-memory variable t_params.dt[] below, and we do not want to add an + # extra _block_synchronize() call after reading it here. + # # Timestep failed, reduce timestep and re-try # Set scratch[end] equal to scratch[1] to start the timestep over @@ -777,20 +1094,11 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er t_params.dt[] * t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order)) t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) - minimum_dt = 1.e-14 - if t_params.dt[] < minimum_dt - println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " - * "$minimum_dt at t=$t. Ending run.") - # Set dt negative to signal an error - t_params.dt[] = -1.0 - end - # Don't update the simulation time, as this step failed t_params.previous_dt[] = 0.0 # Call the 'cause' of the timestep failure the variable that has the biggest # error norm here - max_error_variable_index = argmax(error_norms) t_params.failure_caused_by[max_error_variable_index] += 1 #println("t=$t, timestep failed, error_norm=$error_norm, error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) @@ -810,6 +1118,8 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er if t_params.dt[] > CFL_limit t_params.dt[] = CFL_limit end + + just_completed_output_step = true else # Adjust timestep according to Fehlberg's suggestion # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). @@ -821,12 +1131,15 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er if t_params.dt[] > CFL_limit t_params.dt[] = CFL_limit else - this_limit_caused_by = 1 + # Reserve first four entries of t_params.limit_caused_by for + # max_increase_factor, max_increase_factor_near_fail, minimum_dt and + # maximum_dt limits, high_nl_iterations. + this_limit_caused_by = 5 + max_error_variable_index end # Limit so timestep cannot increase by a large factor, which might lead to # numerical instability in some cases. - max_cap_limit_caused_by = 2 + max_cap_limit_caused_by = 1 if isinf(t_params.max_increase_factor_near_last_fail) # Not using special timestep limiting near last failed dt value max_cap = t_params.max_increase_factor * t_params.previous_dt[] @@ -843,7 +1156,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er max_cap = max(slow_increase_threshold, t_params.max_increase_factor_near_last_fail * t_params.previous_dt[]) - max_cap_limit_caused_by = 3 + max_cap_limit_caused_by = 2 end end if t_params.dt[] > max_cap @@ -854,13 +1167,24 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # Prevent timestep from going below minimum_dt if t_params.dt[] < t_params.minimum_dt t_params.dt[] = t_params.minimum_dt - this_limit_caused_by = 4 + this_limit_caused_by = 3 end # Prevent timestep from going above maximum_dt if t_params.dt[] > t_params.maximum_dt t_params.dt[] = t_params.maximum_dt - this_limit_caused_by = 5 + this_limit_caused_by = 4 + end + + if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 + # The last step took many nonlinear iterations, so do not allow the + # timestep to increase. + # If t_params.previous_dt[]==0.0, then the previous step failed so + # timestep will not be increasing, so do not need this check. + if t_params.dt[] > t_params.previous_dt[] + t_params.dt[] = t_params.previous_dt[] + this_limit_caused_by = 5 + end end t_params.limit_caused_by[this_limit_caused_by] += 1 @@ -875,9 +1199,20 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er end @serial_region begin - if t + t_params.dt[] >= t_params.next_output_time[] + minimum_dt = 1.e-14 + if t_params.dt[] < minimum_dt + println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " + * "$minimum_dt at t=$t. Ending run.") + # Set dt negative to signal an error + t_params.dt[] = -1.0 + end + + current_time = t + t_params.previous_dt[] + if (!t_params.write_after_fixed_step_count && !just_completed_output_step + && (current_time + t_params.dt[] >= t_params.next_output_time[])) + t_params.dt_before_output[] = t_params.dt[] - t_params.dt[] = t_params.next_output_time[] - t + t_params.dt[] = t_params.next_output_time[] - current_time t_params.step_to_output[] = true end end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 27eec4a0f..2460dba15 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -9,26 +9,27 @@ export setup_dummy_and_buffer_arrays using MPI using ..type_definitions: mk_float, mk_int -using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_bool +using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_int, allocate_shared_bool using ..communication using ..communication: _block_synchronize using ..debugging using ..file_io: write_data_to_ascii, write_all_moments_data_to_binary, write_all_dfns_data_to_binary, debug_dump using ..looping using ..moment_kinetics_structs: scratch_pdf -using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status! +using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status!, update_derived_moments!, update_derived_moments_neutral! using ..velocity_moments: update_density!, update_upar!, update_ppar!, update_pperp!, update_qpar!, update_vth! using ..velocity_moments: update_neutral_density!, update_neutral_qz! using ..velocity_moments: update_neutral_uzeta!, update_neutral_uz!, update_neutral_ur! using ..velocity_moments: update_neutral_pzeta!, update_neutral_pz!, update_neutral_pr! using ..velocity_moments: calculate_ion_moment_derivatives!, calculate_neutral_moment_derivatives! -using ..velocity_moments: update_chodura! using ..velocity_grid_transforms: vzvrvzeta_to_vpavperp!, vpavperp_to_vzvrvzeta! using ..boundary_conditions: enforce_boundary_conditions! using ..boundary_conditions: enforce_neutral_boundary_conditions! +using ..boundary_conditions: vpagrid_to_dzdt, enforce_v_boundary_condition_local! using ..input_structs using ..moment_constraints: hard_force_moment_constraints!, - hard_force_moment_constraints_neutral! + hard_force_moment_constraints_neutral!, + moment_constraints_on_residual! using ..advection: setup_advection using ..z_advection: update_speed_z!, z_advection! using ..r_advection: update_speed_r!, r_advection! @@ -36,11 +37,17 @@ using ..neutral_r_advection: update_speed_neutral_r!, neutral_advection_r! using ..neutral_z_advection: update_speed_neutral_z!, neutral_advection_z! using ..neutral_vz_advection: update_speed_neutral_vz!, neutral_advection_vz! using ..vperp_advection: update_speed_vperp!, vperp_advection! -using ..vpa_advection: update_speed_vpa!, vpa_advection! -using ..charge_exchange: charge_exchange_collisions_1V!, charge_exchange_collisions_3V! -using ..ionization: ionization_collisions_1V!, ionization_collisions_3V!, constant_ionization_source! +using ..vpa_advection: update_speed_vpa!, vpa_advection!, implicit_vpa_advection! +using ..charge_exchange: ion_charge_exchange_collisions_1V!, + neutral_charge_exchange_collisions_1V!, + ion_charge_exchange_collisions_3V!, + neutral_charge_exchange_collisions_3V! +using ..ionization: ion_ionization_collisions_1V!, neutral_ionization_collisions_1V!, + ion_ionization_collisions_3V!, neutral_ionization_collisions_3V!, + constant_ionization_source! using ..krook_collisions: krook_collisions! using ..external_sources +using ..nonlinear_solvers using ..numerical_dissipation: vpa_boundary_buffer_decay!, vpa_boundary_buffer_diffusion!, vpa_dissipation!, z_dissipation!, r_dissipation!, vperp_dissipation!, @@ -59,7 +66,7 @@ using ..gyroaverages: init_gyro_operators, gyroaverage_pdf! using ..manufactured_solns: manufactured_sources using ..advection: advection_info using ..runge_kutta: rk_update_evolved_moments!, rk_update_evolved_moments_neutral!, - rk_update_variable!, rk_error_variable!, + rk_update_variable!, rk_loworder_solution!, setup_runge_kutta_coefficients!, local_error_norm, adaptive_timestep_update_t_params! using ..utils: to_minutes, get_minimum_CFL_z, get_minimum_CFL_vpa, @@ -131,6 +138,13 @@ struct scratch_dummy_arrays # needs to be shared memory buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} + # buffers to hold ion pdf for implicit solves + implicit_buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_3::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_4::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_5::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_6::MPISharedArray{mk_float,5} buffer_vzvrvzetazsn_1::MPISharedArray{mk_float,5} buffer_vzvrvzetazsn_2::MPISharedArray{mk_float,5} @@ -155,6 +169,8 @@ struct scratch_dummy_arrays buffer_vpavperp_2::MPISharedArray{mk_float,2} buffer_vpavperp_3::MPISharedArray{mk_float,2} + int_buffer_rs_1::MPISharedArray{mk_int,2} + int_buffer_rs_2::MPISharedArray{mk_int,2} end struct advect_object_struct @@ -232,12 +248,13 @@ end Create a [`input_structs.time_info`](@ref) struct using the settings in `t_input`. """ -function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload, - manufactured_solns_input, io_input) - rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor = - setup_runge_kutta_coefficients!(t_input.type, - t_input.CFL_prefactor, - t_input.split_operators) +function setup_time_info(t_input, n_variables, code_time, dt_reload, + dt_before_last_fail_reload, manufactured_solns_input, io_input) + rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, + adaptive, low_storage, CFL_prefactor = + setup_runge_kutta_coefficients!(t_input["type"], + t_input["CFL_prefactor"], + t_input["split_operators"]) if !adaptive # No adaptive timestep, want to use the value from the input file even when we are @@ -252,52 +269,69 @@ function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_relo dt_before_last_fail = allocate_shared_float(1) step_to_output = allocate_shared_bool(1) if block_rank[] == 0 - dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload - previous_dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload + dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload + previous_dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload next_output_time[] = 0.0 - dt_before_output[] = dt_reload === nothing ? t_input.dt : dt_reload + dt_before_output[] = dt_reload === nothing ? t_input["dt"] : dt_reload dt_before_last_fail[] = dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload step_to_output[] = false end _block_synchronize() - end_time = code_time + t_input.dt * t_input.nstep + end_time = code_time + t_input["dt"] * t_input["nstep"] epsilon = 1.e-11 - if t_input.nwrite == 0 - moments_output_times = [end_time] - else - moments_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite:t_input.nwrite:t_input.nstep] - end - if moments_output_times[end] < end_time - epsilon - push!(moments_output_times, end_time) - end - if t_input.nwrite_dfns == 0 - dfns_output_times = [end_time] + if adaptive || t_input["write_after_fixed_step_count"] + if t_input["nwrite"] == 0 + moments_output_times = [end_time] + else + moments_output_times = [code_time + i*t_input["dt"] + for i ∈ t_input["nwrite"]:t_input["nwrite"]:t_input["nstep"]] + end + if moments_output_times[end] < end_time - epsilon + push!(moments_output_times, end_time) + end + if t_input["nwrite_dfns"] == 0 + dfns_output_times = [end_time] + else + dfns_output_times = [code_time + i*t_input["dt"] + for i ∈ t_input["nwrite_dfns"]:t_input["nwrite_dfns"]:t_input["nstep"]] + end + if dfns_output_times[end] < end_time - epsilon + push!(dfns_output_times, end_time) + end else - dfns_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite_dfns:t_input.nwrite_dfns:t_input.nstep] + # Use nwrite_moments and nwrite_dfns to determine when to write output + moments_output_times = mk_float[] + dfns_output_times = mk_float[] end - if dfns_output_times[end] < end_time - epsilon - push!(dfns_output_times, end_time) + + if rk_coefs_implicit === nothing + # Not an IMEX scheme, so cannot have any implicit terms + t_input["implicit_ion_advance"] = false + t_input["implicit_vpa_advection"] = false end - if t_input.high_precision_error_sum + if t_input["high_precision_error_sum"] error_sum_zero = Float128(0.0) else error_sum_zero = 0.0 end - return time_info(t_input.nstep, end_time, dt_shared, previous_dt_shared, next_output_time, - dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_output, - Ref(0), Ref(0), mk_int[], mk_int[], moments_output_times, - dfns_output_times, t_input.type, rk_coefs, n_rk_stages, rk_order, - adaptive, low_storage, t_input.rtol, t_input.atol, t_input.atol_upar, - t_input.step_update_prefactor, t_input.max_increase_factor, - t_input.max_increase_factor_near_last_fail, - t_input.last_fail_proximity_factor, t_input.minimum_dt, - t_input.maximum_dt, error_sum_zero, t_input.split_operators, - t_input.steady_state_residual, t_input.converged_residual_value, - manufactured_solns_input.use_for_advance, t_input.stopfile_name) + return time_info(n_variables, t_input["nstep"], end_time, dt_shared, previous_dt_shared, + next_output_time, dt_before_output, dt_before_last_fail, + CFL_prefactor, step_to_output, Ref(0), Ref(0), mk_int[], mk_int[], + t_input["nwrite"], t_input["nwrite_dfns"], moments_output_times, + dfns_output_times, t_input["type"], rk_coefs, rk_coefs_implicit, + implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, + low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"], + t_input["step_update_prefactor"], t_input["max_increase_factor"], + t_input["max_increase_factor_near_last_fail"], + t_input["last_fail_proximity_factor"], t_input["minimum_dt"], + t_input["maximum_dt"], t_input["implicit_ion_advance"], + t_input["implicit_vpa_advection"], + t_input["write_after_fixed_step_count"], error_sum_zero, + t_input["split_operators"], t_input["steady_state_residual"], + t_input["converged_residual_value"], + manufactured_solns_input.use_for_advance, t_input["stopfile_name"]) end """ @@ -314,58 +348,102 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop dt_before_last_fail_reload, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, - scratch_dummy, restarting) + scratch_dummy, restarting, input_dict) # define some local variables for convenience/tidiness n_ion_species = composition.n_ion_species n_neutral_species = composition.n_neutral_species ion_mom_diss_coeff = num_diss_params.ion.moment_dissipation_coefficient - electron_mom_diss_coeff = num_diss_params.electron.moment_dissipation_coefficient neutral_mom_diss_coeff = num_diss_params.neutral.moment_dissipation_coefficient - t_params = setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload, - manufactured_solns_input, io_input) + n_variables = 1 # pdf + if moments.evolve_density + # ion density + n_variables += 1 + end + if moments.evolve_upar + # ion flow + n_variables += 1 + end + if moments.evolve_ppar + # ion pressure + n_variables += 1 + end + if composition.n_neutral_species > 0 + # neutral pdf + n_variables += 1 + if moments.evolve_density + # neutral density + n_variables += 1 + end + if moments.evolve_upar + # neutral flow + n_variables += 1 + end + if moments.evolve_ppar + # neutral pressure + n_variables += 1 + end + end + t_params = setup_time_info(t_input, n_variables, code_time, dt_reload, + dt_before_last_fail_reload, manufactured_solns_input, + io_input) # Make Vectors that count which variable caused timestep limits and timestep failures # the right length. Do this setup even when not using adaptive timestepping, because # it is easier than modifying the file I/O according to whether we are using adaptive # timestepping. # - # Entries for limit by accuracy (which is an average over all variables), - # max_increase_factor, minimum_dt and maximum_dt + # Entries for limit by max_increase_factor, max_increase_factor_near_last_fail, + # minimum_dt, maximum_dt and high_nl_iterations. push!(t_params.limit_caused_by, 0, 0, 0, 0, 0) # ion pdf - push!(t_params.limit_caused_by, 0, 0) + push!(t_params.limit_caused_by, 0) # RK accuracy + if !t_params.implicit_ion_advance + push!(t_params.limit_caused_by, 0) # z-advection CFL limit + end + if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + push!(t_params.limit_caused_by, 0) # vpa-advection CFL limit + end push!(t_params.failure_caused_by, 0) if moments.evolve_density # ion density + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_upar # ion flow + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_ppar # ion pressure + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if composition.n_neutral_species > 0 # neutral pdf - push!(t_params.limit_caused_by, 0, 0) + push!(t_params.limit_caused_by, 0, 0, 0) # RK accuracy plus 2 CFL limits push!(t_params.failure_caused_by, 0) if moments.evolve_density # neutral density + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_upar # neutral flow + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_ppar # neutral pressure + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end end + if t_params.rk_coefs_implicit !== nothing + push!(t_params.failure_caused_by, 0) # Nonlinear iteration fails to converge + end # create the 'advance' struct to be used in later Euler advance to # indicate which parts of the equations are to be advanced concurrently. @@ -375,12 +453,69 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop external_source_settings, num_diss_params, manufactured_solns_input, r, z, vperp, vpa, vzeta, vr, vz) + advance_implicit = + setup_implicit_advance_flags(moments, composition, t_params, collisions, + external_source_settings, num_diss_params, + manufactured_solns_input, r, z, vperp, vpa, vzeta, + vr, vz) + # Check that no flags that shouldn't be are set in both advance and advance_implicit + for field ∈ fieldnames(advance_info) + if field ∈ (:r_diffusion, :vpa_diffusion, :vperp_diffusion, :vz_diffusion) + # These are meant to be set in both structs + continue + end + if getfield(advance, field) && getfield(advance_implicit, field) + error("$field is set to `true` in both `advance` and `advance_implicit`") + end + end + + # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of + # timesteps. + if t_params.implicit_ion_advance + # Implicit solve for vpa_advection term should be done in serial, as it will be + # called within a parallelised s_r_z_vperp loop. + nl_solver_ion_advance_params = + setup_nonlinear_solve(input_dict, + (s=composition.n_ion_species, r=r, z=z, vperp=vperp, + vpa=vpa), + (); + default_rtol=t_params.rtol / 10.0, + default_atol=t_params.atol / 10.0, + preconditioner_type="lu") + else + nl_solver_ion_advance_params = nothing + end + if t_params.implicit_vpa_advection + # Implicit solve for vpa_advection term should be done in serial, as it will be + # called within a parallelised s_r_z_vperp loop. + nl_solver_vpa_advection_params = + setup_nonlinear_solve(input_dict, (vpa=vpa,), + (composition.n_ion_species, r, z, vperp); + default_rtol=t_params.rtol / 10.0, + default_atol=t_params.atol / 10.0, + serial_solve=true, preconditioner_type="lu") + else + nl_solver_vpa_advection_params = nothing + end + if nl_solver_ion_advance_params !== nothing && + nl_solver_vpa_advection_params !== nothing + error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " + * "time") + end + nl_solver_params = (ion_advance=nl_solver_ion_advance_params, + vpa_advection=nl_solver_vpa_advection_params,) begin_serial_region() # create an array of structs containing scratch arrays for the pdf and low-order moments # that may be evolved separately via fluid equations - scratch = setup_scratch_arrays(moments, pdf.ion.norm, pdf.neutral.norm, t_params.n_rk_stages) + n_rk_stages = t_params.n_rk_stages + scratch = setup_scratch_arrays(moments, pdf, n_rk_stages + 1) + if t_params.rk_coefs_implicit !== nothing + scratch_implicit = setup_scratch_arrays(moments, pdf, n_rk_stages) + else + scratch_implicit = nothing + end # setup dummy arrays & buffer arrays for z r MPI n_neutral_species_alloc = max(1,composition.n_neutral_species) # create arrays for Fokker-Planck collisions @@ -602,8 +737,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # Ensure all processes are synchronized at the end of the setup _block_synchronize() - return moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs, - manufactured_source_list + return moments, spectral_objects, scratch, scratch_implicit, advance, + advance_implicit, t_params, fp_arrays, gyroavs, manufactured_source_list, + nl_solver_params end """ @@ -620,14 +756,19 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_vperp_advection = false advance_z_advection = false advance_r_advection = false - advance_cx_1V = false - advance_cx = false - advance_ionization = false - advance_ionization_1V = false + advance_ion_cx_1V = false + advance_neutral_cx_1V = false + advance_ion_cx = false + advance_neutral_cx = false + advance_ion_ionization = false + advance_neutral_ionization = false + advance_ion_ionization_1V = false + advance_neutral_ionization_1V = false advance_ionization_source = false advance_krook_collisions_ii = false advance_external_source = false - advance_numerical_dissipation = false + advance_ion_numerical_dissipation = false + advance_neutral_numerical_dissipation = false advance_sources = false advance_continuity = false advance_force_balance = false @@ -649,11 +790,13 @@ function setup_advance_flags(moments, composition, t_params, collisions, # otherwise, check to see if the flags need to be set to true if !t_params.split_operators # default for non-split operators is to include both vpa and z advection together - advance_vpa_advection = vpa.n > 1 && z.n > 1 - advance_vperp_advection = vperp.n > 1 && z.n > 1 - advance_z_advection = z.n > 1 - advance_r_advection = r.n > 1 - if collisions.fkpl.nuii > 0.0 && vperp.n > 1 + # If using an IMEX scheme and implicit vpa advection has been requested, then vpa + # advection is not included in the explicit part of the timestep. + advance_vpa_advection = vpa.n > 1 && z.n > 1 && !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + advance_vperp_advection = vperp.n > 1 && z.n > 1 && !t_params.implicit_ion_advance + advance_z_advection = z.n > 1 && !t_params.implicit_ion_advance + advance_r_advection = r.n > 1 && !t_params.implicit_ion_advance + if collisions.fkpl.nuii > 0.0 && vperp.n > 1 && !t_params.implicit_ion_advance explicit_weakform_fp_collisions = true else explicit_weakform_fp_collisions = false @@ -672,9 +815,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for charge exchange collisions if abs(collisions.charge_exchange) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_cx_1V = true + advance_ion_cx_1V = !t_params.implicit_ion_advance + advance_neutral_cx_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_cx = true + advance_ion_cx = !t_params.implicit_ion_advance + advance_neutral_cx = true else error("If any perpendicular velocity has length>1 they all must. " * "If all perpendicular velocities have length=1, then vpa and " @@ -687,9 +832,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for ionization collisions if abs(collisions.ionization) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_ionization_1V = true + advance_ion_ionization_1V = !t_params.implicit_ion_advance + advance_neutral_ionization_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_ionization = true + advance_ion_ionization = !t_params.implicit_ion_advance + advance_neutral_ionization = true else error("If any perpendicular velocity has length>1 they all must. " * "If all perpendicular velocities have length=1, then vpa and " @@ -700,20 +847,21 @@ function setup_advance_flags(moments, composition, t_params, collisions, end end # exception for the case where ions are evolved alone but sourced by ionization - if collisions.ionization > 0.0 && collisions.constant_ionization_rate + if collisions.ionization > 0.0 && collisions.constant_ionization_rate && !t_params.implicit_ion_advance advance_ionization_source = true end if collisions.krook.nuii0 > 0.0 - advance_krook_collisions_ii = true + advance_krook_collisions_ii = !t_params.implicit_ion_advance end - advance_external_source = external_source_settings.ion.active + advance_external_source = external_source_settings.ion.active && !t_params.implicit_ion_advance advance_neutral_external_source = external_source_settings.neutral.active - advance_numerical_dissipation = true + advance_ion_numerical_dissipation = !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + advance_neutral_numerical_dissipation = true # if evolving the density, must advance the continuity equation, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_density - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_continuity = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -724,7 +872,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_upar - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_force_balance = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -735,7 +883,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_ppar - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_energy = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -743,28 +891,157 @@ function setup_advance_flags(moments, composition, t_params, collisions, end end + # *_diffusion flags are set regardless of whether diffusion is included in explicit or + # implicit part of timestep, because they are used for boundary conditions, not to + # controll which terms are advanced. + # # flag to determine if a d^2/dr^2 operator is present - r_diffusion = (advance_numerical_dissipation && num_diss_params.ion.r_dissipation_coefficient > 0.0) + r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0) # flag to determine if a d^2/dvpa^2 operator is present - vpa_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vperp_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vz_diffusion = (advance_numerical_dissipation && num_diss_params.neutral.vz_dissipation_coefficient > 0.0) + # When using implicit_vpa_advection, the vpa diffusion is included in the implicit + # step + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) + end + + manufactured_solns_test = manufactured_solns_input.use_for_advance + + return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection, + advance_neutral_z_advection, advance_neutral_r_advection, + advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx, + advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization, + advance_neutral_ionization, advance_ion_ionization_1V, + advance_neutral_ionization_1V, advance_ionization_source, + advance_krook_collisions_ii, + explicit_weakform_fp_collisions, + advance_external_source, advance_ion_numerical_dissipation, + advance_neutral_numerical_dissipation, advance_sources, + advance_continuity, advance_force_balance, advance_energy, + advance_neutral_external_source, advance_neutral_sources, + advance_neutral_continuity, advance_neutral_force_balance, + advance_neutral_energy, manufactured_solns_test, r_diffusion, + vpa_diffusion, vperp_diffusion, vz_diffusion) +end + +""" +create the 'advance_info' struct to be used in the time advance to +indicate which parts of the equations are to be advanced implicitly (using +`backward_euler!()`). +""" +function setup_implicit_advance_flags(moments, composition, t_params, collisions, + external_source_settings, num_diss_params, + manufactured_solns_input, r, z, vperp, vpa, vzeta, + vr, vz) + # default is not to concurrently advance different operators + advance_vpa_advection = false + advance_vperp_advection = false + advance_z_advection = false + advance_r_advection = false + advance_ion_cx_1V = false + advance_neutral_cx_1V = false + advance_ion_cx = false + advance_neutral_cx = false + advance_ion_ionization = false + advance_neutral_ionization = false + advance_ion_ionization_1V = false + advance_neutral_ionization_1V = false + advance_ionization_source = false + advance_krook_collisions_ii = false + advance_external_source = false + advance_ion_numerical_dissipation = false + advance_neutral_numerical_dissipation = false + advance_sources = false + advance_continuity = false + advance_force_balance = false + advance_energy = false + advance_neutral_z_advection = false + advance_neutral_r_advection = false + advance_neutral_vz_advection = false + advance_neutral_external_source = false + advance_neutral_sources = false + advance_neutral_continuity = false + advance_neutral_force_balance = false + advance_neutral_energy = false + r_diffusion = false + vpa_diffusion = false + vperp_diffusion = false + vz_diffusion = false + explicit_weakform_fp_collisions = false + if t_params.split_operators + error("Implicit timesteps do not support `t_params.split_operators=true`") end + if t_params.implicit_ion_advance + advance_vpa_advection = vpa.n > 1 && z.n > 1 + advance_vperp_advection = vperp.n > 1 && z.n > 1 + advance_z_advection = z.n > 1 + advance_r_advection = r.n > 1 + if abs(collisions.charge_exchange) > 0.0 + if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 + advance_ion_cx_1V = true + elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 + advance_ion_cx = true + else + error("If any perpendicular velocity has length>1 they all must. " + * "If all perpendicular velocities have length=1, then vpa and " + * "vz should be the same.\n" + * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), " + * "vpa.n=$(vpa.n), vz.n=$(vz.n)") + end + end + if abs(collisions.ionization) > 0.0 + if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 + advance_ion_ionization_1V = true + elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 + advance_ion_ionization = true + else + error("If any perpendicular velocity has length>1 they all must. " + * "If all perpendicular velocities have length=1, then vpa and " + * "vz should be the same.\n" + * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), " + * "vpa.n=$(vpa.n), vz.n=$(vz.n)") + end + end + advance_ionization_source = collisions.ionization > 0.0 && collisions.constant_ionization_rate + advance_krook_collisions_ii = collisions.krook.nuii0 > 0.0 + advance_external_source = external_source_settings.ion.active + advance_ion_numerical_dissipation = true + advance_sources = moments.evolve_density || moments.evolve_upar || moments.evolve_ppar + explicit_weakform_fp_collisions = collisions.fkpl.nuii > 0.0 && vperp.n > 1 + elseif t_params.implicit_vpa_advection + advance_vpa_advection = true + advance_ion_numerical_dissipation = true + end + # *_diffusion flags are set regardless of whether diffusion is included in explicit or + # implicit part of timestep, because they are used for boundary conditions, not to + # controll which terms are advanced. + # + # flag to determine if a d^2/dr^2 operator is present + r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0) + # flag to determine if a d^2/dvpa^2 operator is present + # When using implicit_vpa_advection, the vpa diffusion is included in the implicit + # step + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) manufactured_solns_test = manufactured_solns_input.use_for_advance return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection, advance_neutral_z_advection, advance_neutral_r_advection, - advance_neutral_vz_advection, advance_cx, advance_cx_1V, - advance_ionization, advance_ionization_1V, + advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx, + advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization, + advance_neutral_ionization, advance_ion_ionization_1V, + advance_neutral_ionization_1V, advance_ionization_source, advance_krook_collisions_ii, explicit_weakform_fp_collisions, - advance_external_source, advance_numerical_dissipation, - advance_sources, advance_continuity, advance_force_balance, - advance_energy, advance_neutral_external_source, - advance_neutral_sources, advance_neutral_continuity, - advance_neutral_force_balance, advance_neutral_energy, - manufactured_solns_test, r_diffusion, vpa_diffusion, vperp_diffusion, vz_diffusion) + advance_external_source, advance_ion_numerical_dissipation, + advance_neutral_numerical_dissipation, advance_sources, + advance_continuity, advance_force_balance, advance_energy, + advance_neutral_external_source, advance_neutral_sources, + advance_neutral_continuity, advance_neutral_force_balance, + advance_neutral_energy, manufactured_solns_test, r_diffusion, + vpa_diffusion, vperp_diffusion, vz_diffusion) end function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies_ion,nspecies_neutral) @@ -827,6 +1104,13 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + + implicit_buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_3 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_4 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_5 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_6 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) buffer_vzvrvzetazsn_1 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral) buffer_vzvrvzetazsn_2 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral) @@ -849,6 +1133,9 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperp_2 = allocate_shared_float(nvpa,nvperp) buffer_vpavperp_3 = allocate_shared_float(nvpa,nvperp) + int_buffer_rs_1 = allocate_shared_int(nr,nspecies_ion) + int_buffer_rs_2 = allocate_shared_int(nr,nspecies_ion) + return scratch_dummy_arrays(dummy_s,dummy_sr,dummy_vpavperp,dummy_zrs,dummy_zrsn, buffer_z_1,buffer_z_2,buffer_z_3,buffer_z_4, buffer_r_1,buffer_r_2,buffer_r_3,buffer_r_4, @@ -860,10 +1147,12 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6, buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6, buffer_vpavperpzrs_1,buffer_vpavperpzrs_2, + implicit_buffer_vpavperpzrs_1,implicit_buffer_vpavperpzrs_2,implicit_buffer_vpavperpzrs_3,implicit_buffer_vpavperpzrs_4,implicit_buffer_vpavperpzrs_5,implicit_buffer_vpavperpzrs_6, buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6, buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6, buffer_vzvrvzetazrsn_1, buffer_vzvrvzetazrsn_2, - buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3) + buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3, + int_buffer_rs_1,int_buffer_rs_2) end @@ -895,16 +1184,19 @@ end create an array of structs containing scratch arrays for the normalised pdf and low-order moments that may be evolved separately via fluid equations """ -function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages) - # create n_rk_stages+1 structs, each of which will contain one pdf, - # one density, and one parallel flow array - scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n_rk_stages+1) - pdf_dims = size(pdf_ion_in) +function setup_scratch_arrays(moments, pdf, n) + # will create n_rk_stages+1 structs, each of which will contain one pdf, + # density, parallel flow, parallel pressure, and perpendicular pressure array for ions + # (possibly) the same for electrons, and the same for neutrals. The actual array will + # be created at the end of the first step of the loop below, once we have a + # `scratch_pdf` object of the correct type. + scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n) + pdf_dims = size(pdf.ion.norm) moment_dims = size(moments.ion.dens) - pdf_neutral_dims = size(pdf_neutral_in) + pdf_neutral_dims = size(pdf.neutral.norm) moment_neutral_dims = size(moments.neutral.dens) # populate each of the structs - for istage ∈ 1:n_rk_stages+1 + for istage ∈ 1:n # Allocate arrays in temporary variables so that we can identify them # by source line when using @debug_shared_array pdf_array = allocate_shared_float(pdf_dims...) @@ -925,13 +1217,13 @@ function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages) pdf_neutral_array, density_neutral_array, uz_neutral_array, pz_neutral_array) @serial_region begin - scratch[istage].pdf .= pdf_ion_in + scratch[istage].pdf .= pdf.ion.norm scratch[istage].density .= moments.ion.dens scratch[istage].upar .= moments.ion.upar scratch[istage].ppar .= moments.ion.ppar scratch[istage].pperp .= moments.ion.pperp - scratch[istage].pdf_neutral .= pdf_neutral_in + scratch[istage].pdf_neutral .= pdf.neutral.norm scratch[istage].density_neutral .= moments.neutral.dens scratch[istage].uz_neutral .= moments.neutral.uz scratch[istage].pz_neutral .= moments.neutral.pz @@ -948,11 +1240,13 @@ df/dt + δv⋅∂f/∂z = 0, with δv(z,t)=v(z,t)-v₀(z) for prudent choice of v₀, expect δv≪v so that explicit time integrator can be used without severe CFL condition """ -function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, ascii_io, io_moments, io_dfns) +function time_advance!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, + advect_objects, composition, collisions, geometry, gyroavs, + boundary_distributions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, fp_arrays, + scratch_dummy, manufactured_source_list, ascii_io, io_moments, + io_dfns) @debug_detect_redundant_block_synchronize begin # Only want to check for redundant _block_synchronize() calls during the @@ -986,9 +1280,11 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr moments_output_counter = 1 dfns_output_counter = 1 @serial_region begin - t_params.next_output_time[] = - min(t_params.moments_output_times[moments_output_counter], - t_params.dfns_output_times[dfns_output_counter]) + if t_params.adaptive && !t_params.write_after_fixed_step_count + t_params.next_output_time[] = + min(t_params.moments_output_times[moments_output_counter], + t_params.dfns_output_times[dfns_output_counter]) + end end _block_synchronize() @@ -1004,22 +1300,37 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end while true - diagnostic_checks = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon - || t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon - || t + t_params.dt[] ≥ t_params.end_time - epsilon) + if t_params.adaptive && !t_params.write_after_fixed_step_count + maybe_write_moments = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon + || t + t_params.dt[] ≥ t_params.end_time - epsilon) + maybe_write_dfns = (t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + || t + t_params.dt[] ≥ t_params.end_time - epsilon) + else + maybe_write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 + || t_params.step_counter[] >= t_params.nstep) + maybe_write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0 + || t_params.step_counter[] >= t_params.nstep) + end + diagnostic_checks = (maybe_write_moments || maybe_write_dfns) if t_params.split_operators # MRH NOT SUPPORTED - time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, - vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, - advance, t_params.step_counter[]) + time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, + vpa, z, vpa_spectral, z_spectral, moments, + fields, vpa_advect, z_advect, composition, + collisions, external_source_settings, + num_diss_params, nl_solver_params, advance, + advance_implicit, t_params.step_counter[]) else - time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, diagnostic_checks, t_params.step_counter[]) + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, + vr, vzeta, vpa, vperp, gyrophase, z, r, moments, + fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, + boundary_distributions, external_source_settings, + num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, + manufactured_source_list, diagnostic_checks, + t_params.step_counter[]) end # update the time t += t_params.previous_dt[] @@ -1042,7 +1353,18 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr finish_now = true end - if t ≥ t_params.moments_output_times[moments_output_counter] - epsilon + if t_params.adaptive && !t_params.write_after_fixed_step_count + write_moments = (t ≥ t_params.moments_output_times[moments_output_counter] - epsilon + || t ≥ t_params.end_time - epsilon) + write_dfns = (t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + || t ≥ t_params.end_time - epsilon) + else + write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 + || t_params.step_counter[] >= t_params.nstep) + write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0 + || t_params.step_counter[] >= t_params.nstep) + end + if write_moments moments_output_counter += 1 if moments_output_counter ≤ length(t_params.moments_output_times) @serial_region begin @@ -1052,10 +1374,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end end write_moments = true - else - write_moments = false end - if t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + if write_dfns dfns_output_counter += 1 if dfns_output_counter ≤ length(t_params.dfns_output_times) @serial_region begin @@ -1065,14 +1385,9 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end end write_dfns = true - else - write_dfns = false end if write_moments || write_dfns || finish_now - # update the diagnostic chodura condition - update_chodura!(moments,scratch[end].pdf,vpa,vperp,z,r,spectral_objects.r_spectral,composition,geometry,scratch_dummy,advect_objects.z_advect) - # Always synchronise here, regardless of if we changed region or not begin_serial_region(no_synchronize=true) _block_synchronize() @@ -1103,6 +1418,10 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr finish_now = true end + # Do MPI communication to add up counters from different processes, where + # necessary. + gather_nonlinear_solver_counters!(nl_solver_params) + time_for_run = to_minutes(now() - start_time) end # write moments data to file @@ -1132,7 +1451,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr write_all_moments_data_to_binary(moments, fields, t, composition.n_ion_species, composition.n_neutral_species, io_moments, - iwrite_moments, time_for_run, t_params, r, z) + iwrite_moments, time_for_run, t_params, + nl_solver_params, r, z) if t_params.steady_state_residual # Calculate some residuals to see how close simulation is to steady state @@ -1215,8 +1535,9 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr write_all_dfns_data_to_binary(pdf, moments, fields, t, composition.n_ion_species, composition.n_neutral_species, io_dfns, - iwrite_dfns, time_for_run, t_params, r, z, - vperp, vpa, vzeta, vr, vz) + iwrite_dfns, time_for_run, t_params, + nl_solver_params, r, z, vperp, vpa, vzeta, vr, + vz) iwrite_dfns += 1 begin_s_r_z_vperp_region() @debug_detect_redundant_block_synchronize begin @@ -1245,9 +1566,11 @@ end """ """ -function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, - vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, istep) +function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, vpa, + z, vpa_spectral, z_spectral, moments, fields, + vpa_advect, z_advect, composition, collisions, + external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) # define some abbreviated variables for tidiness n_ion_species = composition.n_ion_species @@ -1260,155 +1583,179 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # advance the operator-split 1D advection equation in vpa # vpa-advection only applies for ion species advance.vpa_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.vpa_advection = false # z_advection! advances the operator-split 1D advection equation in z # apply z-advection operation to all species (ion and neutral) advance.z_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.z_advection = false # account for charge exchange collisions between ions and neutrals if composition.n_neutral_species > 0 if collisions.charge_exchange > 0.0 - advance.cx_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + advance.ion_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) - advance.cx_collisions = false + nl_solver_params, advance, advance_implicit, istep) + advance.ion_cx_collisions = false + advance.neutral_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, + vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.neutral_cx_collisions = false end if collisions.ionization > 0.0 - advance.ionization_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + advance.ion_ionization_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) - advance.ionization_collisions = false + nl_solver_params, advance, advance_implicit, istep) + advance.ion_ionization_collisions = false + advance.neutral_ionization_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, + z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.neutral_ionization_collisions = false end end if collisions.krook.nuii0 > 0.0 advance.krook_collisions_ii = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, z_SL, vpa_SL, composition, collisions, sources, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.krook_collisions_ii = false end # and add the source terms associated with redefining g = pdf/density or pdf*vth/density # to the kinetic equation if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar advance.source_terms = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.source_terms = false end # use the continuity equation to update the density if moments.evolve_density advance.continuity = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.continuity = false end # use force balance to update the parallel flow if moments.evolve_upar advance.force_balance = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.force_balance = false end # use the energy equation to update the parallel pressure if moments.evolve_ppar advance.energy = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.energy = false end else # use the energy equation to update the parallel pressure if moments.evolve_ppar advance.energy = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.energy = false end # use force balance to update the parallel flow if moments.evolve_upar advance.force_balance = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.force_balance = false end # use the continuity equation to update the density if moments.evolve_density advance.continuity = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.continuity = false end # and add the source terms associated with redefining g = pdf/density or pdf*vth/density # to the kinetic equation if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar advance.source_terms = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.source_terms = false end # account for charge exchange collisions between ions and neutrals if composition.n_neutral_species > 0 if collisions.ionization > 0.0 - advance.ionization = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + advance.neutral_ionization = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, + z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.neutral_ionization = false + advance.ion_ionization = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) - advance.ionization = false + nl_solver_params, advance, advance_implicit, istep) + advance.ion_ionization = false end if collisions.charge_exchange > 0.0 - advance.cx_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + advance.neutral_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, + vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.neutral_cx_collisions = false + advance.ion_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) - advance.cx_collisions = false + nl_solver_params, advance, advance_implicit, istep) + advance.ion_cx_collisions = false end end # z_advection! advances the operator-split 1D advection equation in z # apply z-advection operation to all species (ion and neutral) advance.z_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.z_advection = false # advance the operator-split 1D advection equation in vpa # vpa-advection only applies for ion species advance.vpa_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.vpa_advection = false end return nothing @@ -1416,217 +1763,193 @@ end """ """ -function time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, diagnostic_checks, istep) - - ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, composition, collisions, - geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, - advance, fp_arrays, scratch_dummy, manufactured_source_list, diagnostic_checks, istep) +function time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, moments, fields, + spectral_objects, advect_objects, composition, + collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, + fp_arrays, scratch_dummy, manufactured_source_list, + diagnostic_checks, istep) + + ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + diagnostic_checks, istep) return nothing end """ -use information obtained from the Runge-Kutta stages to compute the updated pdf; -for the quantities (density, upar, ppar, vth, qpar and phi) that are derived -from the 'true', un-modified pdf, either: update them using info from Runge Kutta -stages, if the quantities are evolved separately from the modified pdf; -or update them by taking the appropriate velocity moment of the evolved pdf +Use the result of the forward-Euler timestep and the previous Runge-Kutta stages to +compute the updated pdfs, and any evolved moments. """ -function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, vzeta, - vpa, vperp, z, r, spectral_objects, advect_objects, t, t_params, - istage, composition, collisions, geometry, external_source_settings, - gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments, - istep) +function rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) begin_s_r_z_region() new_scratch = scratch[istage+1] old_scratch = scratch[istage] rk_coefs = t_params.rk_coefs[:,istage] - z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral - vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral - vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect - neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect - ## # update the ion distribution and moments ## # here we seem to have duplicate arrays for storing n, u||, p||, etc, but not for vth # 'scratch' is for the multiple stages of time advanced quantities, but 'moments' can be updated directly at each stage - rk_update_variable!(scratch, :pdf, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :pdf, t_params, istage) # use Runge Kutta to update any velocity moments evolved separately from the pdf - rk_update_evolved_moments!(scratch, moments, t_params, istage) - - # Ensure there are no negative values in the pdf before applying boundary - # conditions, so that negative deviations do not mess up the integral-constraint - # corrections in the sheath boundary conditions. - force_minimum_pdf_value!(new_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value) - - # Enforce boundary conditions in z and vpa on the distribution function. - # Must be done after Runge Kutta update so that the boundary condition applied to - # the updated pdf is consistent with the updated moments - otherwise different upar - # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be - # set to zero at the sheath boundary according to the final upar has a non-zero - # contribution from one or more of the terms. - # NB: probably need to do the same for the evolved moments - enforce_boundary_conditions!(new_scratch, moments, - boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z, - r, vpa_spectral, vperp_spectral, - vpa_advect, vperp_advect, z_advect, r_advect, composition, scratch_dummy, - advance.r_diffusion, advance.vpa_diffusion, advance.vperp_diffusion) - - if moments.evolve_density && moments.enforce_conservation - begin_s_r_z_region() - A = moments.ion.constraints_A_coefficient - B = moments.ion.constraints_B_coefficient - C = moments.ion.constraints_C_coefficient - @loop_s_r_z is ir iz begin - (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) = - @views hard_force_moment_constraints!(new_scratch.pdf[:,:,iz,ir,is], - moments, vpa) - end - end - - function update_derived_ion_moments_and_derivatives() - # update remaining velocity moments that are calculable from the evolved pdf - # Note these may be needed for the boundary condition on the neutrals, so must be - # calculated before that is applied. Also may be needed to calculate advection speeds - # for for CFL stability limit calculations in adaptive_timestep_update!(). - update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, - r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - - calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, z_spectral, - num_diss_params.ion.moment_dissipation_coefficient) - end - update_derived_ion_moments_and_derivatives() + rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage) if composition.n_neutral_species > 0 ## # update the neutral particle distribution and moments ## - rk_update_variable!(scratch, :pdf_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :pdf_neutral, t_params, istage; neutrals=true) # use Runge Kutta to update any velocity moments evolved separately from the pdf - rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage) + rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params, istage) + end +end + +""" +Apply boundary conditions and moment constraints to updated pdfs and calculate derived +moments and moment derivatives +""" +function apply_all_bcs_constraints_update_moments!( + this_scratch, moments, fields, boundary_distributions, vz, vr, vzeta, vpa, vperp, + z, r, spectral_objects, advect_objects, composition, geometry, gyroavs, + num_diss_params, advance, scratch_dummy, diagnostic_moments; pdf_bc_constraints=true) + + begin_s_r_z_region() + + z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral + vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect + if pdf_bc_constraints # Ensure there are no negative values in the pdf before applying boundary # conditions, so that negative deviations do not mess up the integral-constraint # corrections in the sheath boundary conditions. - force_minimum_pdf_value_neutral!(new_scratch.pdf_neutral, num_diss_params.neutral.force_minimum_pdf_value) + force_minimum_pdf_value!(this_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value) # Enforce boundary conditions in z and vpa on the distribution function. - # Must be done after Runge Kutta update so that the boundary condition applied to - # the updated pdf is consistent with the updated moments - otherwise different upar - # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be - # set to zero at the sheath boundary according to the final upar has a non-zero - # contribution from one or more of the terms. - # NB: probably need to do the same for the evolved moments - # Note, so far vr and vzeta do not need advect objects, so pass `nothing` for - # those as a placeholder - enforce_neutral_boundary_conditions!(new_scratch.pdf_neutral, new_scratch.pdf, - boundary_distributions, new_scratch.density_neutral, new_scratch.uz_neutral, - new_scratch.pz_neutral, moments, new_scratch.density, new_scratch.upar, - fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect, - neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz, - composition, geometry, scratch_dummy, advance.r_diffusion, - advance.vz_diffusion) + # Must be done after Runge Kutta update so that the boundary condition applied to the + # updated pdf is consistent with the updated moments - otherwise different upar + # between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point that + # should be set to zero at the sheath boundary according to the final upar has a + # non-zero contribution from one or more of the terms. NB: probably need to do the + # same for the evolved moments + enforce_boundary_conditions!(this_scratch, moments, + boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z, r, + vpa_spectral, vperp_spectral, vpa_advect, vperp_advect, z_advect, r_advect, + composition, scratch_dummy, advance.r_diffusion, advance.vpa_diffusion, + advance.vperp_diffusion) if moments.evolve_density && moments.enforce_conservation - begin_sn_r_z_region() - A = moments.neutral.constraints_A_coefficient - B = moments.neutral.constraints_B_coefficient - C = moments.neutral.constraints_C_coefficient - @loop_sn_r_z isn ir iz begin - (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) = - @views hard_force_moment_constraints_neutral!( - new_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz) + begin_s_r_z_region() + A = moments.ion.constraints_A_coefficient + B = moments.ion.constraints_B_coefficient + C = moments.ion.constraints_C_coefficient + @loop_s_r_z is ir iz begin + (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) = + @views hard_force_moment_constraints!(this_scratch.pdf[:,:,iz,ir,is], + moments, vpa) end end + end - function update_derived_neutral_moments_and_derivatives() - # update remaining velocity moments that are calculable from the evolved pdf - update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, - composition) - # update the thermal speed - begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - moments.neutral.vth[iz,ir,isn] = sqrt(2.0*new_scratch.pz_neutral[iz,ir,isn]/new_scratch.density_neutral[iz,ir,isn]) - end - - # update the parallel heat flux - update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated, - new_scratch.density_neutral, new_scratch.uz_neutral, - moments.neutral.vth, new_scratch.pdf_neutral, vz, vr, vzeta, z, - r, composition, moments.evolve_density, moments.evolve_upar, - moments.evolve_ppar) + # update remaining velocity moments that are calculable from the evolved pdf + # Note these may be needed for the boundary condition on the neutrals, so must be + # calculated before that is applied. Also may be needed to calculate advection speeds + # for for CFL stability limit calculations in adaptive_timestep_update!(). + update_derived_moments!(this_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - calculate_neutral_moment_derivatives!(moments, new_scratch, scratch_dummy, z, - z_spectral, - num_diss_params.neutral.moment_dissipation_coefficient) - end - update_derived_neutral_moments_and_derivatives() - end + calculate_ion_moment_derivatives!(moments, this_scratch, scratch_dummy, z, z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) # update the electrostatic potential phi - update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral, - r_spectral, scratch_dummy, gyroavs) - # _block_synchronize() here because phi needs to be read on different ranks than - # it was written on, even though the loop-type does not change here. However, - # after the final RK stage can skip if: - # * evolving upar or ppar as synchronization will be triggered after moments - # updates at the beginning of the next RK step - _block_synchronize() + update_phi!(fields, this_scratch, vperp, z, r, composition, z_spectral, r_spectral, + scratch_dummy, gyroavs) - if t_params.adaptive && istage == t_params.n_rk_stages - # Note the timestep update must be done before calculating derived moments and - # moment derivatives, because the timstep might need to be re-done with a smaller - # dt, in which case scratch[t_params.n_rk_stages+1] will be reset to the values - # from the beginning of the timestep here. - adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition, - collisions, geometry, external_source_settings, - advect_objects, r, z, vperp, vpa, vzeta, vr, vz) - # Re-do this in case adaptive_timestep_update re-arranged the `scratch` vector - new_scratch = scratch[istage+1] - old_scratch = scratch[istage] - - if t_params.previous_dt[] == 0.0 - # Re-update remaining velocity moments that are calculable from the evolved - # pdf These need to be re-calculated because `new_scratch` was swapped with - # the beginning of the timestep, because the timestep failed - update_derived_ion_moments_and_derivatives() - if composition.n_neutral_species > 0 - update_derived_neutral_moments_and_derivatives() - end + if composition.n_neutral_species > 0 + if pdf_bc_constraints + # Ensure there are no negative values in the pdf before applying boundary + # conditions, so that negative deviations do not mess up the integral-constraint + # corrections in the sheath boundary conditions. + force_minimum_pdf_value_neutral!(this_scratch.pdf_neutral, + num_diss_params.neutral.force_minimum_pdf_value) + + # Enforce boundary conditions in z and vpa on the distribution function. + # Must be done after Runge Kutta update so that the boundary condition applied to + # the updated pdf is consistent with the updated moments - otherwise different + # upar between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point + # that should be set to zero at the sheath boundary according to the final upar + # has a non-zero contribution from one or more of the terms. NB: probably need to + # do the same for the evolved moments Note, so far vr and vzeta do not need advect + # objects, so pass `nothing` for those as a placeholder + enforce_neutral_boundary_conditions!(this_scratch.pdf_neutral, this_scratch.pdf, + boundary_distributions, this_scratch.density_neutral, this_scratch.uz_neutral, + this_scratch.pz_neutral, moments, this_scratch.density, this_scratch.upar, + fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect, + neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz, + composition, geometry, scratch_dummy, advance.r_diffusion, + advance.vz_diffusion) - # update the electrostatic potential phi - update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral, - r_spectral, scratch_dummy, gyroavs) - if !(( moments.evolve_upar || moments.evolve_ppar) && - istage == length(scratch)-1) - # _block_synchronize() here because phi needs to be read on different ranks than - # it was written on, even though the loop-type does not change here. However, - # after the final RK stage can skip if: - # * evolving upar or ppar as synchronization will be triggered after moments - # updates at the beginning of the next RK step - _block_synchronize() + if moments.evolve_density && moments.enforce_conservation + begin_sn_r_z_region() + A = moments.neutral.constraints_A_coefficient + B = moments.neutral.constraints_B_coefficient + C = moments.neutral.constraints_C_coefficient + @loop_sn_r_z isn ir iz begin + (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) = + @views hard_force_moment_constraints_neutral!( + this_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz) + end end end + + # update remaining velocity moments that are calculable from the evolved pdf + update_derived_moments_neutral!(this_scratch, moments, vz, vr, vzeta, z, r, + composition) + # update the thermal speed + begin_sn_r_z_region() + @loop_sn_r_z isn ir iz begin + moments.neutral.vth[iz,ir,isn] = sqrt(2.0*this_scratch.pz_neutral[iz,ir,isn]/this_scratch.density_neutral[iz,ir,isn]) + end + + # update the parallel heat flux + update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated, + this_scratch.density_neutral, this_scratch.uz_neutral, + moments.neutral.vth, this_scratch.pdf_neutral, vz, vr, vzeta, z, + r, composition, moments.evolve_density, moments.evolve_upar, + moments.evolve_ppar) + + calculate_neutral_moment_derivatives!(moments, this_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.neutral.moment_dissipation_coefficient) end end """ - adaptive_timestep_update!(scratch, t_params, rk_coefs, moments, n_neutral_species) + adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, + fields, composition, collisions, geometry, + external_source_settings, spectral_objects, + advect_objects, gyroavs, num_diss_params, advance, + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, + success, nl_max_its_fraction) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. """ -function adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition, +function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, + fields, boundary_distributions, composition, collisions, geometry, external_source_settings, - advect_objects, r, z, vperp, vpa, vzeta, vr, vz) + spectral_objects, advect_objects, gyroavs, + num_diss_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) #error_norm_method = "Linf" error_norm_method = "L2" @@ -1662,30 +1985,34 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # reduction over the shared-memory block, so all processes must calculate the same # species at the same time. begin_r_vperp_vpa_region(; no_synchronize=true) - ion_z_CFL = Inf - @loop_s is begin - update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar, - evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is) - this_minimum = get_minimum_CFL_z(z_advect[is].speed, z) - @serial_region begin - ion_z_CFL = min(ion_z_CFL, this_minimum) + if !t_params.implicit_ion_advance + ion_z_CFL = Inf + @loop_s is begin + update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar, + evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is) + this_minimum = get_minimum_CFL_z(z_advect[is].speed, z) + @serial_region begin + ion_z_CFL = min(ion_z_CFL, this_minimum) + end end + push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL) end - push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL) - # ion vpa-advection - begin_r_z_vperp_region() - ion_vpa_CFL = Inf - update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r, - composition, collisions, external_source_settings.ion, t, - geometry) - @loop_s is begin - this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa) - @serial_region begin - ion_vpa_CFL = min(ion_vpa_CFL, this_minimum) + if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + # ion vpa-advection + begin_r_z_vperp_region() + ion_vpa_CFL = Inf + update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r, + composition, collisions, external_source_settings.ion, t, + geometry) + @loop_s is begin + this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa) + @serial_region begin + ion_vpa_CFL = min(ion_vpa_CFL, this_minimum) + end end + push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL) end - push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL) # To avoid double counting points when we use distributed-memory MPI, skip the # inner/lower point in r and z if this process is not the first block in that @@ -1693,12 +2020,56 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos skip_r_inner = r.irank != 0 skip_z_lower = z.irank != 0 - # Calculate error for ion distribution functions - # Note rk_error_variable!() stores the calculated error in `scratch[2]`. - rk_error_variable!(scratch, :pdf, t_params) - ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[end].pdf, t_params.rtol, - t_params.atol; method=error_norm_method, - skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, + # Calculate low-order approximations, from which the timestep error can be estimated. + # Note we store the calculated low-order approxmation in `scratch[2]`. + rk_loworder_solution!(scratch, scratch_implicit, :pdf, t_params) + if moments.evolve_density + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :density, t_params) + end + if moments.evolve_upar + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :upar, t_params) + end + if moments.evolve_ppar + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params) + end + if n_neutral_species > 0 + begin_sn_r_z_vzeta_vr_region() + rk_loworder_solution!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) + if moments.evolve_density + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) + end + if moments.evolve_upar + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) + end + if moments.evolve_ppar + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) + end + end + + # Apply boundary conditions and constraints + apply_all_bcs_constraints_update_moments!( + scratch[2], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false) + + # Re-calculate moment derivatives in the `moments` struct, in case they were changed + # by the previous call + apply_all_bcs_constraints_update_moments!( + scratch[t_params.n_rk_stages+1], moments, fields, boundary_distributions, vz, vr, + vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false; pdf_bc_constraints=false) + + # Calculate the timstep error estimates + ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[t_params.n_rk_stages+1].pdf, + t_params.rtol, t_params.atol; + method=error_norm_method, skip_r_inner=skip_r_inner, + skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) push!(error_norms, ion_pdf_error) push!(total_points, @@ -1707,8 +2078,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # Calculate error for ion moments, if necessary if moments.evolve_density begin_s_r_z_region() - rk_error_variable!(scratch, :density, t_params) - ion_n_err = local_error_norm(scratch[2].density, scratch[end].density, + ion_n_err = local_error_norm(scratch[2].density, + scratch[t_params.n_rk_stages+1].density, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, @@ -1718,8 +2089,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_upar begin_s_r_z_region() - rk_error_variable!(scratch, :upar, t_params) - ion_u_err = local_error_norm(scratch[2].upar, scratch[end].upar, t_params.rtol, + ion_u_err = local_error_norm(scratch[2].upar, + scratch[t_params.n_rk_stages+1].upar, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) @@ -1728,8 +2099,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_ppar begin_s_r_z_region() - rk_error_variable!(scratch, :ppar, t_params) - ion_p_err = local_error_norm(scratch[2].ppar, scratch[end].ppar, t_params.rtol, + ion_p_err = local_error_norm(scratch[2].ppar, + scratch[t_params.n_rk_stages+1].ppar, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) @@ -1770,7 +2141,6 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos push!(CFL_limits, t_params.CFL_prefactor * neutral_vz_CFL) # Calculate error for neutral distribution functions - rk_error_variable!(scratch, :pdf_neutral, t_params; neutrals=true) neut_pdf_error = local_error_norm(scratch[2].pdf_neutral, scratch[end].pdf_neutral, t_params.rtol, t_params.atol; method=error_norm_method, @@ -1785,7 +2155,6 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # Calculate error for neutral moments, if necessary if moments.evolve_density begin_sn_r_z_region() - rk_error_variable!(scratch, :density_neutral, t_params; neutrals=true) neut_n_err = local_error_norm(scratch[2].density_neutral, scratch[end].density_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, @@ -1797,8 +2166,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_upar begin_sn_r_z_region() - rk_error_variable!(scratch, :uz_neutral, t_params; neutrals=true) - neut_u_err = local_error_norm(scratch[2].uz_neutral, scratch[end].uz_neutral, + neut_u_err = local_error_norm(scratch[2].uz_neutral, + scratch[t_params.n_rk_stages+1].uz_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -1809,8 +2178,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_ppar begin_sn_r_z_region() - rk_error_variable!(scratch, :pz_neutral, t_params; neutrals=true) - neut_p_err = local_error_norm(scratch[2].pz_neutral, scratch[end].pz_neutral, + neut_p_err = local_error_norm(scratch[2].pz_neutral, + scratch[t_params.n_rk_stages+1].pz_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -1822,100 +2191,31 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) - - return nothing -end + total_points, current_dt, error_norm_method, + success, nl_max_its_fraction) -""" -update velocity moments that are calculable from the evolved ion pdf -""" -function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, - r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - - if composition.gyrokinetic_ions - ff = scratch_dummy.buffer_vpavperpzrs_1 - # fill buffer with ring-averaged F (gyroaverage at fixed position) - gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition) - else - ff = new_scratch.pdf + if t_params.previous_dt[] == 0.0 + # Re-update remaining velocity moments that are calculable from the evolved + # pdf These need to be re-calculated because `scratch[istage+1]` is now the + # state at the beginning of the timestep, because the timestep failed + apply_all_bcs_constraints_update_moments!( + scratch[t_params.n_rk_stages+1], moments, fields, nothing, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false; + pdf_bc_constraints=false) end - - if !moments.evolve_density - update_density!(new_scratch.density, moments.ion.dens_updated, - ff, vpa, vperp, z, r, composition) - end - if !moments.evolve_upar - update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density, - new_scratch.ppar, ff, vpa, vperp, z, r, composition, - moments.evolve_density, moments.evolve_ppar) - end - if !moments.evolve_ppar - # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e) - update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density, - new_scratch.upar, ff, vpa, vperp, z, r, composition, - moments.evolve_density, moments.evolve_upar) - end - update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition) - - # if diagnostic time step/RK stage - # update the diagnostic chodura condition - if diagnostic_moments - update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect) - end - # update the thermal speed - begin_s_r_z_region() - try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible - update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition) - catch e - if global_size[] > 1 - println("ERROR: error calculating vth in time_advance.jl") - println(e) - display(stacktrace(catch_backtrace())) - flush(stdout) - flush(stderr) - MPI.Abort(comm_world, 1) - end - rethrow(e) - end - # update the parallel heat flux - update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density, - new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r, - composition, moments.evolve_density, moments.evolve_upar, - moments.evolve_ppar) - # add further moments to be computed here - -end -""" -update velocity moments that are calculable from the evolved neutral pdf -""" -function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, - composition) - if !moments.evolve_density - update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition) - end - if !moments.evolve_upar - update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated, - new_scratch.density_neutral, new_scratch.pz_neutral, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, - moments.evolve_density, moments.evolve_ppar) - end - if !moments.evolve_ppar - update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated, - new_scratch.density_neutral, new_scratch.uz_neutral, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, - moments.evolve_density, moments.evolve_upar) - end + return nothing end """ """ -function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, composition, collisions, - geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, - advance, fp_arrays, scratch_dummy, manufactured_source_list,diagnostic_checks, istep) +function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, + manufactured_source_list,diagnostic_checks, istep) begin_s_r_z_region() @@ -1950,25 +2250,116 @@ function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase _block_synchronize() end + # success is set to false if an iteration failed to converge in an implicit solve + success = true for istage ∈ 1:n_rk_stages - # do an Euler time advance, with scratch[2] containing the advanced quantities - # and scratch[1] containing quantities at time level n - update_solution_vector!(scratch, moments, istage, composition, vpa, vperp, z, r) + if t_params.rk_coefs_implicit !== nothing + update_solution_vector!(scratch_implicit[istage], scratch[istage], moments, + composition, vpa, vperp, z, r) + if t_params.implicit_coefficient_is_zero[istage] + # No implicit solve needed at this stage. Do an explicit step of the + # implicitly-evolved terms so we can store their time-derivative at this + # stage. + euler_time_advance!(scratch_implicit[istage], scratch[istage], + pdf, fields, moments, advect_objects, vz, vr, vzeta, + vpa, vperp, gyrophase, z, r, t, t_params.dt[], + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + advance_implicit, fp_arrays, istage) + # The result of the forward-Euler step is just a hack to store the + # (explicit) time-derivative of the implicitly advanced terms. The result + # is not used as input to the explicit part of the IMEX advance. + old_scratch = scratch[istage] + else + # Backward-Euler step for implicitly-evolved terms. + # Note the timestep for this solve is rk_coefs_implict[istage,istage]*dt. + # The diagonal elements are equal to the Butcher 'a' coefficients + # rk_coefs_implicit[istage,istage]=a[istage,istage]. + success = backward_euler!(scratch_implicit[istage], scratch[istage], pdf, + fields, moments, advect_objects, vz, vr, vzeta, + vpa, vperp, gyrophase, z, r, t, t_params.dt[] * + t_params.rk_coefs_implicit[istage,istage], + spectral_objects, composition, collisions, + geometry, scratch_dummy, + manufactured_source_list, + external_source_settings, num_diss_params, + gyroavs, nl_solver_params, advance_implicit, + fp_arrays, istage) + success = MPI.Allreduce(success, &, comm_world) + if !success + # Break out of the istage loop, as passing `success = false` to the + # adaptive timestep update function will signal a failed timestep, so + # that we restart this timestep with a smaller `dt`. + break + end + # The result of the implicit solve gives the state vector at 'istage' + # which is used as input to the explicit part of the IMEX time step. + old_scratch = scratch_implicit[istage] + apply_all_bcs_constraints_update_moments!( + scratch_implicit[istage], moments, fields, boundary_distributions, vz, + vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, + composition, geometry, gyroavs, num_diss_params, advance, + scratch_dummy, false) + end + else + # Fully explicit method starts the forward-Euler step with the result from the + # previous stage. + old_scratch = scratch[istage] + end + update_solution_vector!(scratch[istage+1], old_scratch, moments, composition, vpa, + vperp, z, r) + # do an Euler time advance, with scratch[istage+1] containing the advanced + # quantities and scratch[istage] containing quantities at time level n, RK stage + # istage # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf - euler_time_advance!(scratch[istage+1], scratch[istage], - pdf, fields, moments, - advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, - t_params.dt[], spectral_objects, composition, - collisions, geometry, scratch_dummy, manufactured_source_list, - external_source_settings, num_diss_params, advance, fp_arrays, istage) + euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, t_params.dt[], spectral_objects, composition, + collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) + + # Always apply boundary conditions and constraints here for explicit schemes. For + # IMEX schemes, only apply boundary conditions and constraints at the final RK + # stage - for other stages they are imposed after the implicit part of the step. + # If `implicit_coefficient_is_zero` is true for the next stage, then this step is + # explicit, so we need the bcs and constraints. + apply_bc_constraints = (t_params.rk_coefs_implicit === nothing + || istage == n_rk_stages + || t_params.implicit_coefficient_is_zero[istage+1]) diagnostic_moments = diagnostic_checks && istage == n_rk_stages - @views rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, - vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, - t, t_params, istage, composition, collisions, geometry, - external_source_settings, gyroavs, num_diss_params, advance, - scratch_dummy, diagnostic_moments, istep) + apply_all_bcs_constraints_update_moments!( + scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments; + pdf_bc_constraints=apply_bc_constraints) + end + + if t_params.adaptive + nl_max_its_fraction = 0.0 + for p ∈ nl_solver_params + if p !== nothing + nl_max_its_fraction = + max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations, + nl_max_its_fraction) + end + end + adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, fields, + boundary_distributions, composition, collisions, + geometry, external_source_settings, spectral_objects, + advect_objects, gyroavs, num_diss_params, advance, + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success, + nl_max_its_fraction) + elseif !success + error("Implicit part of timestep failed") end + reset_nonlinear_per_stage_counters(nl_solver_params.ion_advance) + reset_nonlinear_per_stage_counters(nl_solver_params.vpa_advection) + istage = n_rk_stages+1 # update the pdf.norm and moments arrays as needed @@ -2126,31 +2517,51 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, source_terms_manufactured!(fvec_out.pdf, fvec_out.pdf_neutral, vz, vr, vzeta, vpa, vperp, z, r, t, dt, composition, manufactured_source_list) end - if advance.cx_collisions || advance.ionization_collisions + if advance.ion_cx_collisions || advance.ion_ionization_collisions # gyroaverage neutral dfn and place it in the ion.buffer array for use in the collisions step vzvrvzeta_to_vpavperp!(pdf.ion.buffer, fvec_in.pdf_neutral, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, geometry, composition) + end + if advance.neutral_cx_collisions || advance.neutral_ionization_collisions # interpolate ion particle dfn and place it in the neutral.buffer array for use in the collisions step vpavperp_to_vzvrvzeta!(pdf.neutral.buffer, fvec_in.pdf, vz, vr, vzeta, vpa, vperp, z, r, geometry, composition) end # account for charge exchange collisions between ions and neutrals - if advance.cx_collisions_1V - charge_exchange_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in, - moments, composition, vpa, vz, - collisions.charge_exchange, vpa_spectral, - vz_spectral, dt) - elseif advance.cx_collisions - charge_exchange_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, pdf.neutral.buffer, fvec_in, composition, - vz, vr, vzeta, vpa, vperp, z, r, collisions.charge_exchange, dt) + if advance.ion_cx_collisions_1V + ion_charge_exchange_collisions_1V!(fvec_out.pdf, fvec_in, moments, composition, + vpa, vz, collisions.charge_exchange, + vpa_spectral, vz_spectral, dt) + elseif advance.ion_cx_collisions + ion_charge_exchange_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in, + composition, vz, vr, vzeta, vpa, vperp, z, r, + collisions.charge_exchange, dt) + end + if advance.neutral_cx_collisions_1V + neutral_charge_exchange_collisions_1V!(fvec_out.pdf_neutral, fvec_in, moments, + composition, vpa, vz, + collisions.charge_exchange, vpa_spectral, + vz_spectral, dt) + elseif advance.neutral_cx_collisions + neutral_charge_exchange_collisions_3V!(fvec_out.pdf_neutral, pdf.neutral.buffer, + fvec_in, composition, vz, vr, vzeta, vpa, + vperp, z, r, collisions.charge_exchange, + dt) end # account for ionization collisions between ions and neutrals - if advance.ionization_collisions_1V - ionization_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in, vz, vpa, - vperp, z, r, vz_spectral, moments, composition, - collisions, dt) - elseif advance.ionization_collisions - ionization_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, fvec_in, composition, - vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + if advance.ion_ionization_collisions_1V + ion_ionization_collisions_1V!(fvec_out.pdf, fvec_in, vz, vpa, vperp, z, r, + vz_spectral, moments, composition, collisions, dt) + elseif advance.ion_ionization_collisions + ion_ionization_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in, composition, + vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + end + if advance.neutral_ionization_collisions_1V + neutral_ionization_collisions_1V!(fvec_out.pdf_neutral, fvec_in, vz, vpa, vperp, + z, r, vz_spectral, moments, composition, + collisions, dt) + elseif advance.neutral_ionization_collisions + neutral_ionization_collisions_3V!(fvec_out.pdf_neutral, fvec_in, composition, vz, + vr, vzeta, vpa, vperp, z, r, collisions, dt) end if advance.ionization_source constant_ionization_source!(fvec_out.pdf, fvec_in, vpa, vperp, z, r, moments, @@ -2173,7 +2584,7 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, end # add numerical dissipation - if advance.numerical_dissipation + if advance.ion_numerical_dissipation vpa_dissipation!(fvec_out.pdf, fvec_in.pdf, vpa, vpa_spectral, dt, num_diss_params.ion.vpa_dissipation_coefficient) vperp_dissipation!(fvec_out.pdf, fvec_in.pdf, vperp, vperp_spectral, dt, @@ -2182,6 +2593,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, num_diss_params.ion.z_dissipation_coefficient, scratch_dummy) r_dissipation!(fvec_out.pdf, fvec_in.pdf, r, r_spectral, dt, num_diss_params.ion.r_dissipation_coefficient, scratch_dummy) + end + if advance.neutral_numerical_dissipation vz_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, vz, vz_spectral, dt, num_diss_params.neutral.vz_dissipation_coefficient) z_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, z, z_spectral, @@ -2261,13 +2674,306 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, return nothing end +function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, gyroavs, nl_solver_params, advance, fp_arrays, + istage) + + vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral + vz_spectral, vr_spectral, vzeta_spectral = spectral_objects.vz_spectral, spectral_objects.vr_spectral, spectral_objects.vzeta_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect + + if nl_solver_params.ion_advance !== nothing + success = implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, + external_source_settings, num_diss_params, + gyroavs, nl_solver_params.ion_advance, advance, + fp_arrays, istage) + if !success + return success + end + elseif advance.vpa_advection + success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, + z_advect, vpa_advect, vpa, vperp, z, r, dt, t, + r_spectral, z_spectral, vpa_spectral, + composition, collisions, + external_source_settings.ion, geometry, + nl_solver_params.vpa_advection, + advance.vpa_diffusion, num_diss_params, gyroavs, + scratch_dummy) + if !success + return success + end + end + + return true +end + +""" + implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, + vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params, advance, fp_arrays, istage) + +Do a backward-Euler timestep for all terms in the ion kinetic equation. +""" +function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, + vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, gyroavs, + nl_solver_params, advance, fp_arrays, istage) + + vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + + # Make a copy of fvec_in.pdf so we can apply boundary conditions at the 'new' + # timestep, as these are the boundary conditions we need to apply the residual. + f_old = scratch_dummy.implicit_buffer_vpavperpzrs_1 + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + f_old[ivpa,ivperp,iz,ir,is] = fvec_in.pdf[ivpa,ivperp,iz,ir,is] + end + + coords = (s=composition.n_ion_species, r=r, z=z, vperp=vperp, vpa=vpa) + icut_lower_z = scratch_dummy.int_buffer_rs_1 + icut_upper_z = scratch_dummy.int_buffer_rs_2 + zero = 1.0e-14 + + rtol = nl_solver_params.rtol + atol = nl_solver_params.atol + + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + @views hard_force_moment_constraints!(f_old[:,:,iz,ir,is], moments, vpa) + end + + begin_s_r_region() + @loop_s_r is ir begin + if z.irank == 0 + iz = 1 + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_lower_z[ir,is] = vpa.n + for ivpa ∈ vpa.n:-1:1 + # for left boundary in zed (z = -Lz/2), want + # f(z=-Lz/2, v_parallel > 0) = 0 + if vpa.scratch[ivpa] ≤ zero + icut_lower_z[ir,is] = ivpa + 1 + break + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_upper_z[ir,is] = 0 + for ivpa ∈ 1:vpa.n + # for right boundary in zed (z = Lz/2), want + # f(z=Lz/2, v_parallel < 0) = 0 + if vpa.scratch[ivpa] ≥ -zero + icut_upper_z[ir,is] = ivpa - 1 + break + end + end + end + end + + if vpa.n > 1 + # calculate the vpa advection speed, to ensure it is correct when used to apply the + # boundary condition + update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, + collisions, external_source_settings.ion, t, geometry) + end + if z.n > 1 + @loop_s is begin + # get the updated speed along the z direction using the current f + @views update_speed_z!(z_advect[is], fvec_in.upar[:,:,is], + moments.ion.vth[:,:,is], moments.evolve_upar, + moments.evolve_ppar, fields, vpa, vperp, z, r, t, + geometry, is) + end + end + if r.n > 1 + @loop_s is begin + # get the updated speed along the r direction using the current f + @views update_speed_r!(r_advect[is], fvec_in.upar[:,:,is], + moments.ion.vth[:,:,is], fields, moments.evolve_upar, + moments.evolve_ppar, vpa, vperp, z, r, geometry, is) + end + end + if vperp.n > 1 + # calculate the vpa advection speed, to ensure it is correct when used to apply the + # boundary condition + begin_s_r_z_vpa_region() + @loop_s is begin + # get the updated speed along the r direction using the current f + @views update_speed_vperp!(vperp_advect[is], vpa, vperp, z, r, z_advect[is], + r_advect[is], geometry) + end + end + + function apply_bc!(x) + if vpa.n > 1 + begin_s_r_z_vperp_region() + @loop_s_r_z_vperp is ir iz ivperp begin + @views enforce_v_boundary_condition_local!(x[:,ivperp,iz,ir,is], vpa.bc, + vpa_advect[is].speed[:,ivperp,iz,ir], + advance.vpa_diffusion, vpa, + vpa_spectral) + end + end + if vperp.n > 1 + begin_s_r_z_vpa_region() + enforce_vperp_boundary_condition!(x, vperp.bc, vperp, vperp_spectral, + vperp_adv, vperp_diffusion) + end + + if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + begin_s_r_vperp_region() + if z.irank == 0 + iz = 1 + @loop_s_r_vperp is ir ivperp begin + x[icut_lower_z[ir,is]:end,ivperp,iz,ir,is] .= 0.0 + end + end + if z.irank == z.nrank - 1 + iz = z.n + @loop_s_r_vperp is ir ivperp begin + x[1:icut_upper_z[ir,is],ivperp,iz,ir,is] .= 0.0 + end + end + end + + return nothing + end + + # Use a forward-Euler step as the initial guess for fvec_out.pdf + euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + # Apply the 'new' boundary conditions to f_old, so it has the same boundary conditions + # as we will apply to the residual, so that f_new obeys the 'new' boundary conditions. + apply_bc!(f_old) + # Also apply the bc to the forward-Euler updated values which are the initial state + # for 'f_new'. + apply_bc!(fvec_out.pdf) + hard_force_moment_constraints!(fvec_out.pdf, moments, vpa) + + # Define a function whose input is `f_new`, so that when it's output + # `residual` is zero, f_new is the result of a backward-Euler timestep: + # (f_new - f_old) / dt = RHS(f_new) + # ⇒ f_new - f_old - dt*RHS(f_new) = 0 + function residual_func!(residual, f_new) + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + residual[ivpa,ivperp,iz,ir,is] = f_old[ivpa,ivperp,iz,ir,is] + end + + # scratch_pdf struct containing the array passed as f_new + new_scratch = scratch_pdf(f_new, fvec_out.density, fvec_out.upar, fvec_out.ppar, + fvec_out.pperp, fvec_out.temp_z_s, fvec_out.pdf_neutral, + fvec_out.density_neutral, fvec_out.uz_neutral, + fvec_out.pz_neutral) + # scratch_pdf struct containing the array passed as residual + residual_scratch = scratch_pdf(residual, fvec_out.density, fvec_out.upar, + fvec_out.ppar, fvec_out.pperp, fvec_out.temp_z_s, + fvec_out.pdf_neutral, fvec_out.density_neutral, + fvec_out.uz_neutral, fvec_out.pz_neutral) + + # Ensure moments are consistent with f_new + update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, + false) + calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) + + euler_time_advance!(residual_scratch, new_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, dt, spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, advance, fp_arrays, + istage) + + # Make sure updated f will not contain negative values + #@. residual = max(residual, minval) + + # Now + # residual = f_old + dt*RHS(f_new) + # so update to desired residual + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + residual[ivpa,ivperp,iz,ir,is] = f_new[ivpa,ivperp,iz,ir,is] - residual[ivpa,ivperp,iz,ir,is] + end + + apply_bc!(residual) + + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + @views moment_constraints_on_residual!(residual[:,:,iz,ir,is], + f_new[:,:,iz,ir,is], moments, vpa) + end + + return nothing + end + + # No preconditioning for now + left_preconditioner = identity + right_preconditioner = identity + + # Buffers + # Note vpa,scratch is used by advance_f!, so we cannot use it here. + residual = scratch_dummy.implicit_buffer_vpavperpzrs_2 + delta_x = scratch_dummy.implicit_buffer_vpavperpzrs_3 + rhs_delta = scratch_dummy.implicit_buffer_vpavperpzrs_4 + v = scratch_dummy.implicit_buffer_vpavperpzrs_5 + w = scratch_dummy.implicit_buffer_vpavperpzrs_6 + + # Using the forward-Euler step seems (in at least one case) to slightly + # increase the number of iterations, so skip this. + ## Use forward-Euler step for initial guess + #residual_func!(residual, this_f_out) + #this_f_out .+= residual + + success = newton_solve!(fvec_out.pdf, residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params, coords=coords, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner) + + nl_solver_params.stage_counter[] += 1 + + return success +end + """ update the vector containing the pdf and any evolved moments of the pdf for use in the Runge-Kutta time advance """ -function update_solution_vector!(evolved, moments, istage, composition, vpa, vperp, z, r) - new_evolved = evolved[istage+1] - old_evolved = evolved[istage] +function update_solution_vector!(new_evolved, old_evolved, moments, composition, vpa, vperp, z, r) begin_s_r_z_region() @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin new_evolved.pdf[ivpa,ivperp,iz,ir,is] = old_evolved.pdf[ivpa,ivperp,iz,ir,is] diff --git a/moment_kinetics/src/velocity_moments.jl b/moment_kinetics/src/velocity_moments.jl index 1316080aa..5b211e437 100644 --- a/moment_kinetics/src/velocity_moments.jl +++ b/moment_kinetics/src/velocity_moments.jl @@ -1440,6 +1440,90 @@ function calculate_neutral_moment_derivatives!(moments, scratch, scratch_dummy, end end +""" +update velocity moments that are calculable from the evolved ion pdf +""" +function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) + + if composition.gyrokinetic_ions + ff = scratch_dummy.buffer_vpavperpzrs_1 + # fill buffer with ring-averaged F (gyroaverage at fixed position) + gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition) + else + ff = new_scratch.pdf + end + + if !moments.evolve_density + update_density!(new_scratch.density, moments.ion.dens_updated, + ff, vpa, vperp, z, r, composition) + end + if !moments.evolve_upar + update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density, + new_scratch.ppar, ff, vpa, vperp, z, r, composition, + moments.evolve_density, moments.evolve_ppar) + end + if !moments.evolve_ppar + # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e) + update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density, + new_scratch.upar, ff, vpa, vperp, z, r, composition, + moments.evolve_density, moments.evolve_upar) + end + update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition) + + # if diagnostic time step/RK stage + # update the diagnostic chodura condition + if diagnostic_moments + update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect) + end + # update the thermal speed + begin_s_r_z_region() + try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible + update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition) + catch e + if global_size[] > 1 + println("ERROR: error calculating vth in time_advance.jl") + println(e) + display(stacktrace(catch_backtrace())) + flush(stdout) + flush(stderr) + MPI.Abort(comm_world, 1) + end + rethrow(e) + end + # update the parallel heat flux + update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density, + new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r, + composition, moments.evolve_density, moments.evolve_upar, + moments.evolve_ppar) + # add further moments to be computed here + +end + +""" +update velocity moments that are calculable from the evolved neutral pdf +""" +function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, + composition) + + if !moments.evolve_density + update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition) + end + if !moments.evolve_upar + update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated, + new_scratch.density_neutral, new_scratch.pz_neutral, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, + moments.evolve_density, moments.evolve_ppar) + end + if !moments.evolve_ppar + update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated, + new_scratch.density_neutral, new_scratch.uz_neutral, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, + moments.evolve_density, moments.evolve_upar) + end +end + """ computes the integral over vpa of the integrand, using the input vpa_wgts """ diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 9d4881b0b..bd70b3503 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -6,8 +6,20 @@ export vpa_advection! export update_speed_vpa! using ..advection: advance_f_local! +using ..boundary_conditions: enforce_v_boundary_condition_local! using ..communication using ..looping +using ..moment_constraints: hard_force_moment_constraints!, + moment_constraints_on_residual! +using ..moment_kinetics_structs: scratch_pdf, weak_discretization_info +using ..nonlinear_solvers: newton_solve! +using ..velocity_moments: update_derived_moments!, calculate_ion_moment_derivatives! + +using ..array_allocation: allocate_float +using ..boundary_conditions: vpagrid_to_dzdt +using ..calculus: second_derivative! +using LinearAlgebra +using SparseArrays """ """ @@ -30,6 +42,278 @@ function vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z, end end +""" +""" +function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_advect, + vpa, vperp, z, r, dt, t, r_spectral, z_spectral, + vpa_spectral, composition, collisions, + ion_source_settings, geometry, nl_solver_params, + vpa_diffusion, num_diss_params, gyroavs, scratch_dummy) + if vperp.n > 1 && (moments.evolve_density || moments.evolve_upar || moments.evolve_ppar) + error("Moment constraints in implicit_vpa_advection!() do not support 2V runs yet") + end + + # calculate the advection speed corresponding to current f + update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, + collisions, ion_source_settings, t, geometry) + + # Ensure moments are consistent with f_new + new_scratch = scratch_pdf(f_out, fvec_in.density, fvec_in.upar, fvec_in.ppar, + fvec_in.pperp, fvec_in.temp_z_s, fvec_in.pdf_neutral, + fvec_in.density_neutral, fvec_in.uz_neutral, + fvec_in.pz_neutral) + update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, false) + calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) + + begin_s_r_z_vperp_region() + + coords = (vpa=vpa,) + vpa_bc = vpa.bc + minval = num_diss_params.ion.force_minimum_pdf_value + vpa_dissipation_coefficient = num_diss_params.ion.vpa_dissipation_coefficient + zero = 1.0e-14 + @loop_s is begin + @loop_r_z_vperp ir iz ivperp begin + f_old_no_bc = @view fvec_in.pdf[:,ivperp,iz,ir,is] + this_f_out = @view f_out[:,ivperp,iz,ir,is] + speed = @view vpa_advect[is].speed[:,ivperp,iz,ir] + + if z.irank == 0 && iz == 1 + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_lower_z = vpa.n + for ivpa ∈ vpa.n:-1:1 + # for left boundary in zed (z = -Lz/2), want + # f(z=-Lz/2, v_parallel > 0) = 0 + if vpa.scratch[ivpa] ≤ zero + icut_lower_z = ivpa + 1 + break + end + end + end + if z.irank == z.nrank - 1 && iz == z.n + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_upper_z = 0 + for ivpa ∈ 1:vpa.n + # for right boundary in zed (z = Lz/2), want + # f(z=Lz/2, v_parallel < 0) = 0 + if vpa.scratch[ivpa] ≥ -zero + icut_upper_z = ivpa - 1 + break + end + end + end + + function apply_bc!(x) + # Boundary condition + enforce_v_boundary_condition_local!(x, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + if z.bc == "wall" + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + if z.irank == 0 && iz == 1 + x[icut_lower_z:end] .= 0.0 + end + # absolute velocity at right boundary + if z.irank == z.nrank - 1 && iz == z.n + x[1:icut_upper_z] .= 0.0 + end + end + end + + # Need to apply 'new' boundary conditions to `f_old`, so that by imposing them + # on `residual`, they are automatically imposed on `f_new`. + f_old = vpa.scratch7 .= f_old_no_bc + apply_bc!(f_old) + + #if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + # advection_matrix = allocate_float(vpa.n, vpa.n) + # advection_matrix .= 0.0 + # for i ∈ 1:vpa.nelement_local + # imin = vpa.imin[i] - (i != 1) + # imax = vpa.imax[i] + # if i == 1 + # advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # else + # if speed[imin] < 0.0 + # advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # elseif speed[imin] > 0.0 + # # Do nothing + # else + # advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # end + # end + # advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] + # if i == vpa.nelement_local + # advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # else + # if speed[imax] < 0.0 + # # Do nothing + # elseif speed[imax] > 0.0 + # advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # else + # advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # end + # end + # end + # # Multiply by advection speed + # for i ∈ 1:vpa.n + # advection_matrix[i,:] .*= dt * speed[i] + # end + # for i ∈ 1:vpa.n + # advection_matrix[i,i] += 1.0 + # end + + # if isa(vpa_spectral, weak_discretization_info) + # # This allocates a new matrix - to avoid this would need to pre-allocate a + # # suitable buffer somewhere and use `mul!()`. + # advection_matrix = vpa_spectral.mass_matrix * advection_matrix + # @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix + # elseif vpa_dissipation_coefficient > 0.0 + # error("Non-weak-form schemes cannot precondition diffusion") + # end + + # # hacky (?) Dirichlet boundary conditions + # this_f_out[1] = 0.0 + # this_f_out[end] = 0.0 + # advection_matrix[1,:] .= 0.0 + # advection_matrix[1,1] = 1.0 + # advection_matrix[end,:] .= 0.0 + # advection_matrix[end,end] = 1.0 + + # if z.bc == "wall" + # if z.irank == 0 && iz == 1 + # # Set equal df/dt equal to f on points that should be set to zero for + # # boundary condition. The vector that the inverse of the advection matrix + # # acts on should have zeros there already. + # advection_matrix[icut_lower_z:end,icut_lower_z:end] .= 0.0 + # for i ∈ icut_lower_z:vpa.n + # advection_matrix[i,i] = 1.0 + # end + # end + # if z.irank == z.nrank - 1 && iz == z.n + # # Set equal df/dt equal to f on points that should be set to zero for + # # boundary condition. The vector that the inverse of the advection matrix + # # acts on should have zeros there already. + # # I comes from LinearAlgebra and represents identity matrix + # advection_matrix[1:icut_upper_z,1:icut_upper_z] .= 0.0 + # for i ∈ 1:icut_upper_z + # advection_matrix[i,i] = 1.0 + # end + # end + # end + + # advection_matrix = sparse(advection_matrix) + # nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix) + #end + + #function preconditioner(x) + # if isa(vpa_spectral, weak_discretization_info) + # # Multiply by mass matrix, storing result in vpa.scratch + # mul!(vpa.scratch, vpa_spectral.mass_matrix, x) + # end + + # # Handle boundary conditions + # enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion, + # vpa, vpa_spectral) + + # if z.bc == "wall" + # # Wall boundary conditions. Note that as density, upar, ppar do not + # # change in this implicit step, f_new, f_old, and residual should all + # # be zero at exactly the same set of grid points, so it is reasonable + # # to zero-out `residual` to impose the boundary condition. We impose + # # this after subtracting f_old in case rounding errors, etc. mean that + # # at some point f_old had a different boundary condition cut-off + # # index. + # if z.irank == 0 && iz == 1 + # vpa.scratch[icut_lower_z:end] .= 0.0 +# # println("at icut_lower_z ", f_new[icut_lower_z], " ", f_old[icut_lower_z]) + # end + # # absolute velocity at right boundary + # if z.irank == z.nrank - 1 && iz == z.n + # vpa.scratch[1:icut_upper_z] .= 0.0 + # end + # end + + # # Do LU application on vpa.scratch, storing result in x + # ldiv!(x, nl_solver_params.preconditioners[ivperp,iz,ir,is], vpa.scratch) + # return nothing + #end + left_preconditioner = identity + right_preconditioner = identity + #right_preconditioner = preconditioner + + # Define a function whose input is `f_new`, so that when it's output + # `residual` is zero, f_new is the result of a backward-Euler timestep: + # (f_new - f_old) / dt = RHS(f_new) + # ⇒ f_new - f_old - dt*RHS(f_new) = 0 + function residual_func!(residual, f_new) + apply_bc!(f_new) + residual .= f_old + advance_f_local!(residual, f_new, vpa_advect[is], ivperp, iz, ir, vpa, dt, + vpa_spectral) + + if vpa_diffusion + second_derivative!(vpa.scratch, f_new, vpa, vpa_spectral) + @. residual += dt * vpa_dissipation_coefficient * vpa.scratch + end + + # Make sure updated f will not contain negative values + #@. residual = max(residual, minval) + + # Now + # residual = f_old + dt*RHS(f_new) + # so update to desired residual + @. residual = f_new - residual + + apply_bc!(residual) + end + + # Buffers + # Note vpa,scratch is used by advance_f!, so we cannot use it here. + residual = vpa.scratch2 + delta_x = vpa.scratch3 + rhs_delta = vpa.scratch4 + v = vpa.scratch5 + w = vpa.scratch6 + + # Use forward-Euler step for initial guess + # By passing this_f_out, which is equal to f_old at this point, the 'residual' + # is + # f_new - f_old - dt*RHS(f_old) = -dt*RHS(f_old) + # so to get a forward-Euler step we have to subtract this 'residual' + residual_func!(residual, this_f_out) + this_f_out .-= residual + + success = newton_solve!(this_f_out, residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params, coords=coords, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner) + if !success + return success + end + end + end + + nl_solver_params.stage_counter[] += 1 + + return true +end + """ calculate the advection speed in the vpa-direction at each grid point """ diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl new file mode 100644 index 000000000..5c0b08dfe --- /dev/null +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -0,0 +1,289 @@ +module NonlinearSolverTests + +include("setup.jl") + +using moment_kinetics.array_allocation: allocate_float, allocate_shared_float +using moment_kinetics.communication +using moment_kinetics.coordinates: coordinate +using moment_kinetics.input_structs: advection_input +using moment_kinetics.looping +using moment_kinetics.looping: setup_loop_ranges! +using moment_kinetics.nonlinear_solvers +using moment_kinetics.type_definitions: mk_float, mk_int + +using MPI + +function linear_test() + println(" - linear test") + @testset "linear test $coord_names" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true)) + # Test represents constant-coefficient diffusion, in 1D steady state, with a + # central finite-difference discretisation of the second derivative. + # + # Note, need to use newton_solve!() here even though it is a linear problem, + # because the inexact Jacobian-vector product we use in linear_solve!() means + # linear_solve!() on its own does not converge to the correct answer. + + n = 16 + restart = 8 + max_restarts = 1 + atol = 1.0e-10 + + irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r = + setup_distributed_memory_MPI(1, 1, 1, 1) + + setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1, + vzeta=1, vr=1, vz=1) + + A = zeros(n,n) + i = 1 + A[i,i] = -2.0 + A[i,i+1] = 1.0 + for i ∈ 2:n-1 + A[i,i-1] = 1.0 + A[i,i] = -2.0 + A[i,i+1] = 1.0 + end + i = n + A[i,i-1] = 1.0 + A[i,i] = -2.0 + + z = collect(0:n-1) ./ (n-1) + b = @. - z * (1.0 - z) + + the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), + zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0), + "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), + advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), + zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, + zeros(mk_float, 0), zeros(mk_float, 0), "", + zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), + zeros(mk_float, 0, 0)) + coords = NamedTuple(c => the_coord for c ∈ coord_names) + + function rhs_func!(residual, x) + if serial_solve + residual .= A * x - b + else + begin_serial_region() + @serial_region begin + residual .= A * x - b + end + end + return nothing + end + + if serial_solve + x = allocate_float(n) + residual = allocate_float(n) + delta_x = allocate_float(n) + rhs_delta = allocate_float(n) + v = allocate_float(n) + w = allocate_float(n) + + x .= 0.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + else + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + + begin_serial_region() + @serial_region begin + x .= 0.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end + end + + nl_solver_params = setup_nonlinear_solve( + Dict{String,Any}("nonlinear_solver" => + Dict{String,Any}("rtol" => 0.0, + "atol" => atol, + "linear_restart" => restart, + "linear_max_restarts" => max_restarts)), + coords; serial_solve=serial_solve) + + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; + coords) + + if serial_solve + x_direct = A \ b + + @test isapprox(x, x_direct; atol=100.0*atol) + else + begin_serial_region() + @serial_region begin + x_direct = A \ b + + @test isapprox(x, x_direct; atol=100.0*atol) + end + end + end +end + +function nonlinear_test() + println(" - non-linear test") + @testset "non-linear test" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true)) + # Test represents constant-coefficient diffusion, in 1D steady state, with a + # central finite-difference discretisation of the second derivative. + # + # Note, need to use newton_solve!() here even though it is a linear problem, + # because the inexact Jacobian-vector product we use in linear_solve!() means + # linear_solve!() on its own does not converge to the correct answer. + + n = 16 + restart = 10 + max_restarts = 0 + atol = 1.0e-10 + + irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r = + setup_distributed_memory_MPI(1, 1, 1, 1) + + setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1, + vzeta=1, vr=1, vz=1) + + z = collect(0:n-1) ./ (n-1) + b = @. - z * (1.0 - z) + + the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), + zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0), + "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), + advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), + zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, + zeros(mk_float, 0), zeros(mk_float, 0), "", + zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), + zeros(mk_float, 0, 0)) + coords = NamedTuple(c => the_coord for c ∈ coord_names) + + function rhs_func!(residual, x) + if serial_solve + i = 1 + D = abs(x[i])^2.5 + residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i] + for i ∈ 2:n-1 + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i] + end + i = n + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i] + else + begin_serial_region() + @serial_region begin + i = 1 + D = abs(x[i])^2.5 + residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i] + for i ∈ 2:n-1 + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i] + end + i = n + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i] + end + end + return nothing + end + + if serial_solve + x = allocate_float(n) + residual = allocate_float(n) + delta_x = allocate_float(n) + rhs_delta = allocate_float(n) + v = allocate_float(n) + w = allocate_float(n) + else + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + end + + if serial_solve + x .= 1.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + else + begin_serial_region() + @serial_region begin + x .= 1.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end + end + + nl_solver_params = setup_nonlinear_solve( + Dict{String,Any}("nonlinear_solver" => + Dict{String,Any}("rtol" => 0.0, + "atol" => atol, + "linear_restart" => restart, + "linear_max_restarts" => max_restarts, + "nonlinear_max_iterations" => 100)), + coords; serial_solve=serial_solve) + + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; + coords) + + rhs_func!(residual, x) + + if serial_solve + @test isapprox(residual, zeros(n); atol=4.0*atol) + else + begin_serial_region() + @serial_region begin + @test isapprox(residual, zeros(n); atol=4.0*atol) + end + end + end +end + +function runtests() + if Sys.isapple() + @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() + end + else + @testset "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() + end + end +end + +end # NonlinearSolverTests + +using .NonlinearSolverTests +NonlinearSolverTests.runtests() diff --git a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl index cdee05ac9..d76a644f9 100644 --- a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl +++ b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl @@ -163,6 +163,11 @@ test_input_chebyshev = merge(test_input_finite_difference, "vz_ngrid" => 17, "vz_nelement" => 8)) +if global_size[] > 2 && global_size[] % 2 == 0 + # Test using distributed-memory + test_input_chebyshev["z_nelement_local"] = test_input_chebyshev["z_nelement"] ÷ 2 +end + test_input_chebyshev_split_1_moment = merge(test_input_chebyshev, Dict("run_name" => "chebyshev_pseudospectral_split_1_moment", diff --git a/moment_kinetics/test/nonlinear_sound_wave_tests.jl b/moment_kinetics/test/nonlinear_sound_wave_tests.jl index 0bb965494..590047000 100644 --- a/moment_kinetics/test/nonlinear_sound_wave_tests.jl +++ b/moment_kinetics/test/nonlinear_sound_wave_tests.jl @@ -6,12 +6,9 @@ using Base.Filesystem: tempname using moment_kinetics.coordinates: define_coordinate using moment_kinetics.input_structs: grid_input, advection_input -using moment_kinetics.load_data: open_readonly_output_file, load_coordinate_data, - load_species_data, load_fields_data, - load_ion_moments_data, load_pdf_data, - load_neutral_particle_moments_data, - load_neutral_pdf_data, load_time_data, load_species_data using moment_kinetics.interpolation: interpolate_to_grid_z, interpolate_to_grid_vpa +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable using moment_kinetics.type_definitions: mk_float const analytical_rtol = 3.e-2 @@ -79,35 +76,43 @@ function run_test(test_input, rtol, atol, upar_rtol=nothing; args...) # Load and analyse output ######################### - path = joinpath(realpath(input["base_directory"]), name, name) + path = joinpath(realpath(input["base_directory"]), name) - # open the netcdf file containing moments data and give it the handle 'fid' - fid = open_readonly_output_file(path, "moments") + # open the output file(s) + run_info = get_run_info_no_setup(path; dfns=true) # load species, time coordinate data - n_ion_species, n_neutral_species = load_species_data(fid) - ntime, time = load_time_data(fid) - n_ion_species, n_neutral_species = load_species_data(fid) + n_ion_species = run_info.composition.n_ion_species + n_neutral_species = run_info.composition.n_neutral_species + ntime = run_info.nt + time = run_info.time # load fields data - phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid) + phi_zrt = postproc_load_variable(run_info, "phi") + Er_zrt = postproc_load_variable(run_info, "Er") + Ez_zrt = postproc_load_variable(run_info, "Ez") # load velocity moments data - n_ion_zrst, upar_ion_zrst, ppar_ion_zrst, qpar_ion_zrst, v_t_ion_zrst = load_ion_moments_data(fid) - n_neutral_zrst, upar_neutral_zrst, ppar_neutral_zrst, qpar_neutral_zrst, v_t_neutral_zrst = load_neutral_particle_moments_data(fid) - z, z_spectral = load_coordinate_data(fid, "z") + n_ion_zrst = postproc_load_variable(run_info, "density") + upar_ion_zrst = postproc_load_variable(run_info, "parallel_flow") + ppar_ion_zrst = postproc_load_variable(run_info, "parallel_pressure") + qpar_ion_zrst = postproc_load_variable(run_info, "parallel_heat_flux") + v_t_ion_zrst = postproc_load_variable(run_info, "thermal_speed") + n_neutral_zrst = postproc_load_variable(run_info, "density_neutral") + upar_neutral_zrst = postproc_load_variable(run_info, "uz_neutral") + ppar_neutral_zrst = postproc_load_variable(run_info, "pz_neutral") + qpar_neutral_zrst = postproc_load_variable(run_info, "qz_neutral") + v_t_neutral_zrst = postproc_load_variable(run_info, "thermal_speed_neutral") + z = run_info.z + z_spectral = run_info.z_spectral - close(fid) - - # open the netcdf file containing pdf data - fid = open_readonly_output_file(path, "dfns") - # load particle distribution function (pdf) data - f_ion_vpavperpzrst = load_pdf_data(fid) - f_neutral_vzvrvzetazrst = load_neutral_pdf_data(fid) - vpa, vpa_spectral = load_coordinate_data(fid, "vpa") + f_ion_vpavperpzrst = postproc_load_variable(run_info, "f") + f_neutral_vzvrvzetazrst = postproc_load_variable(run_info, "f_neutral") + vpa = run_info.vpa + vpa_spectral = run_info.vpa_spectral - close(fid) + close_run_info(run_info) phi = phi_zrt[:,1,:] n_ion = n_ion_zrst[:,1,:,:] diff --git a/moment_kinetics/test/recycling_fraction_tests.jl b/moment_kinetics/test/recycling_fraction_tests.jl index 935ed968e..28a22c84f 100644 --- a/moment_kinetics/test/recycling_fraction_tests.jl +++ b/moment_kinetics/test/recycling_fraction_tests.jl @@ -12,10 +12,8 @@ using MPI using moment_kinetics.coordinates: define_coordinate using moment_kinetics.input_structs: grid_input, advection_input using moment_kinetics.interpolation: interpolate_to_grid_z -using moment_kinetics.load_data: open_readonly_output_file -using moment_kinetics.load_data: load_fields_data, - load_pdf_data, load_time_data, - load_species_data +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable # default inputs for tests test_input = Dict("n_ion_species" => 1, @@ -91,6 +89,10 @@ test_input = Dict("n_ion_species" => 1, "source_strength" => 2.0, "source_T" => 2.0)) +if global_size[] > 2 && global_size[] % 2 == 0 + # Test using distributed-memory + test_input["z_nelement_local"] = test_input["z_nelement"] ÷ 2 +end test_input_split1 = merge(test_input, Dict("run_name" => "split1", @@ -203,20 +205,15 @@ function run_test(test_input, expected_phi; rtol=4.e-14, atol=1.e-15, args...) # Load and analyse output ######################### - path = joinpath(realpath(input["base_directory"]), name, name) + path = joinpath(realpath(input["base_directory"]), name) - # open the netcdf file and give it the handle 'fid' - fid = open_readonly_output_file(path,"moments") + # open the output file(s) + run_info = get_run_info_no_setup(path) - # load species, time coordinate data - n_ion_species, n_neutral_species = load_species_data(fid) - ntime, time = load_time_data(fid) - n_ion_species, n_neutral_species = load_species_data(fid) - # load fields data - phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid) + phi_zrt = postproc_load_variable(run_info, "phi") - close(fid) + close_run_info(run_info) phi = phi_zrt[:,1,:] end diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl index 1b78fca36..fa0f5d64f 100644 --- a/moment_kinetics/test/runtests.jl +++ b/moment_kinetics/test/runtests.jl @@ -7,6 +7,7 @@ function runtests() include(joinpath(@__DIR__, "calculus_tests.jl")) include(joinpath(@__DIR__, "interpolation_tests.jl")) include(joinpath(@__DIR__, "loop_setup_tests.jl")) + include(joinpath(@__DIR__, "nonlinear_solver_tests.jl")) include(joinpath(@__DIR__, "velocity_integral_tests.jl")) include(joinpath(@__DIR__, "sound_wave_tests.jl")) include(joinpath(@__DIR__, "nonlinear_sound_wave_tests.jl")) diff --git a/moment_kinetics/test/setup.jl b/moment_kinetics/test/setup.jl index 3bb9896ef..555824d00 100644 --- a/moment_kinetics/test/setup.jl +++ b/moment_kinetics/test/setup.jl @@ -13,9 +13,9 @@ using moment_kinetics module MKTestUtilities export use_verbose, force_optional_dependencies, @long, quietoutput, get_MPI_tempdir, - global_rank, maxabs_norm, @testset_skip + global_rank, global_size, maxabs_norm, @testset_skip -using moment_kinetics.communication: comm_world, global_rank +using moment_kinetics.communication: comm_world, global_rank, global_size using moment_kinetics.command_line_options: get_options using MPI diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index 952444ea1..f4f3c366d 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -6,6 +6,115 @@ into ones that we can use. using Symbolics +# Following two functions copied and modified from Symbolics.jl's linear_algebra.jl so +# that we can hack them to force them to return a Rational{BigInt} result. +# Modifications: +# * Add prefix `my_` to the function names, to avoid confusion/conflicts +# * Change `Num.()` to `Rational{BigInt}.()` in `_my_solve` so that `A` and `b` are +# arrays of `Rational{BigInt}` (so that we avoid any rounding errors). For the case +# that we want, the entries of `A` and `b` are all numerical values (not actual +# symbolic expressions), so this hack can be done. +# * Change `/` to `//` in `my_sym_lu2()` +using Symbolics: linear_expansion, SymbolicUtils, value, sym_lu, Num, RCNum, _iszero, nterms +using LinearAlgebra +function my_solve_for(eq, var; simplify=false, check=true) # scalar case + # simplify defaults for `false` as canonicalization should handle most of + # the cases. + a, b, islinear = linear_expansion(eq, var) + check && @assert islinear + islinear || return nothing + # a * x + b = 0 + if eq isa AbstractArray && var isa AbstractArray + x = _my_solve(a, -b, simplify) + else + x = a \ -b + end + simplify || return x + if x isa AbstractArray + SymbolicUtils.simplify.(simplify_fractions.(x)) + else + SymbolicUtils.simplify(simplify_fractions(x)) + end +end + +function _my_solve(A::AbstractMatrix, b::AbstractArray, do_simplify) + #A = Num.(value.(SymbolicUtils.quick_cancel.(A))) + #b = Num.(value.(SymbolicUtils.quick_cancel.(b))) + A = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(A))) + b = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(b))) + sol = value.(sym_lu(A) \ b) + do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol +end + +function my_solve_for2(eq, var; simplify=false, check=true) # scalar case + # simplify defaults for `false` as canonicalization should handle most of + # the cases. + a, b, islinear = linear_expansion(eq, var) + check && @assert islinear + islinear || return nothing + # a * x + b = 0 + if eq isa AbstractArray && var isa AbstractArray + x = _my_solve2(a, -b, simplify) + else + x = a \ -b + end + simplify || return x + if x isa AbstractArray + SymbolicUtils.simplify.(simplify_fractions.(x)) + else + SymbolicUtils.simplify(simplify_fractions(x)) + end +end + +function _my_solve2(A::AbstractMatrix, b::AbstractArray, do_simplify) + A = Num.(value.(SymbolicUtils.quick_cancel.(A))) + b = Num.(value.(SymbolicUtils.quick_cancel.(b))) + sol = value.(my_sym_lu2(A) \ b) + do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol +end + +function my_sym_lu2(A; check=true) + SINGULAR = typemax(Int) + m, n = size(A) + F = map(x->x isa RCNum ? x : Num(x), A) + minmn = min(m, n) + p = Vector{LinearAlgebra.BlasInt}(undef, minmn) + info = 0 + for k = 1:minmn + kp = k + amin = SINGULAR + for i in k:m + absi = _iszero(F[i, k]) ? SINGULAR : nterms(F[i,k]) + if absi < amin + kp = i + amin = absi + end + end + + p[k] = kp + + if amin == SINGULAR && !(amin isa Symbolic) && (amin isa Number) && iszero(info) + info = k + end + + # swap + for j in 1:n + F[k, j], F[kp, j] = F[kp, j], F[k, j] + end + + for i in k+1:m + F[i, k] = F[i, k] // F[k, k] + end + for j = k+1:n + for i in k+1:m + F[i, j] = F[i, j] - F[i, k] * F[k, j] + end + end + end + check && LinearAlgebra.checknonsingular(info) + LU(F, p, convert(LinearAlgebra.BlasInt, info)) +end + """ convert_butcher_tableau_for_moment_kinetics(a, b) @@ -27,11 +136,15 @@ that can be used to calculate an error estimate. Currently assumes the method is explicit, so `a` has no non-zero diagonal or upper-triangular elements. -Returns an array `rk_coeffs` of size `n_rk_stages`x`n_rk_stages` where `size(a) = +Returns an array `rk_coefs` of size `n_rk_stages`x`n_rk_stages` where `size(a) = (n_rk_stages, n_rk_stages)`. """ -function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) - using_rationals = isa(a[1,1], Rational) +function convert_butcher_tableau_for_moment_kinetics(a, b, + a_implicit=zeros(size(a)), + b_implicit=zeros(size(b)); + low_storage=true) + using_rationals = eltype(a) <: Rational || eltype(b) <: Rational || eltype(a_implicit) <: Rational || eltype(b_implicit) <: Rational + imex = any(a_implicit .!= 0) n_rk_stages = size(a, 1) if size(b, 1) > 1 adaptive = true @@ -50,291 +163,573 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) # y_out are the same as y, but given as expressions in terms of y and f # k are the RHS evaluations as defined on the Wikipedia page # k_subs are the k evaluated in terms of y by back-substituting the definitions of y. - @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages] - y = Symbolics.scalarize(y) - y_out = Symbolics.scalarize(y_out) + @variables y_tilde[1:n_rk_stages+1] k[1:n_rk_stages] yn rk_coefs[1:n_rk_stages+1, 1:output_size] + @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages] rk_coefs_implicit[1:n_rk_stages, 1:output_size+1] + y_tilde = Symbolics.scalarize(y_tilde) k = Symbolics.scalarize(k) - k_subs = Symbolics.scalarize(k_subs) + rk_coefs = Symbolics.scalarize(rk_coefs) + y = Symbolics.scalarize(y) + k_implicit = Symbolics.scalarize(k_implicit) + rk_coefs_implicit = Symbolics.scalarize(rk_coefs_implicit) + + # Expressions defined using the 'standard' Butcher formulae + y_tilde_k_expressions = [ + yn + (i == 1 ? 0 : sum(a[i,j] * k[j] for j ∈ 1:i-1) + sum(a_implicit[i,j] * k_implicit[j] for j ∈ 1:i-1)) + for i ∈ 1:n_rk_stages + ] + # Note that when using an IMEX scheme, if a_implicit[i,i]==0, then k_implicit[i] is + # actually an explicit RHS evaluation (evaluated using y_tilde[i]), and the explicit + # RHS k[i] will be evaluated using y_tilde[i] instead of y[i] so that we can store + # (y_tilde[i] + k_implicit[i]) in y[i], as a way to have k_implicit[i] available. + implicit_coefficient_is_zero = [imex && a_implicit[i,i] == 0 for i ∈ 1:n_rk_stages] + y_k_expressions = [ + y_tilde_k_expressions[i] + (implicit_coefficient_is_zero[i] ? 1 : a_implicit[i,i]) * k_implicit[i] + for i ∈ 1:n_rk_stages + ] + # Final entry of y_k_expressions is y^(n+1) + push!(y_tilde_k_expressions, yn + + sum(b[1,i] * k[i] for i ∈ 1:n_rk_stages) + + sum(b_implicit[1,i] * k_implicit[i] for i ∈ 1:n_rk_stages)) - if using_rationals - k_subs[1] = (y[2] - y[1]) // a[2,1] + if adaptive + y_loworder = yn + + sum(b[2,i] * k[i] for i ∈ 1:n_rk_stages) + + sum(b_implicit[2,i] * k_implicit[i] for i ∈ 1:n_rk_stages) + end + + # Define expressions for y_tilde[i] using the rk_coefs as used in moment_kinetics + # Note that we need a special case for an imex scheme with some a[i,i]=0, as for those + # entries we hacked y[i] to allow k_implicit[i] to be saved, and we need to use + # y_tilde[i] as the starting point for the forward-Euler derivative instead of y[i]. + y_tilde_rk_coefs_expressions = [ + yn, # i=1 + (sum(rk_coefs[j,i-1] * y_tilde[j] for j ∈ 1:i-1) + + rk_coefs[i,i-1] * ((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + sum(rk_coefs_implicit[j,i] * y[j] for j ∈ 1:i-1) + for i ∈ 2:n_rk_stages+1)... + ] + # Note the 'implicit step' is treated specially, as the coefficient will be used to + # scale the timestep in the code, rather than as the coefficient of some version of + # y/y_tilde. rk_coefs_implicit[i,i] should end up being equal to a_implicit[i,i]. + y_rk_coefs_expressions = [ + e + rk_coefs_implicit[i,i] * k_implicit[i] + for (i,e) ∈ enumerate(y_tilde_rk_coefs_expressions[1:n_rk_stages]) + ] + + # Substitute to eliminate y_tilde[i] from the expressions + y_tilde_rk_coefs_expressions = [ + substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + for e ∈ y_tilde_rk_coefs_expressions + ] + y_rk_coefs_expressions = [ + substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + for e ∈ y_rk_coefs_expressions + ] + + + # Substitute to eliminate y[i] from the expressions + y_tilde_rk_coefs_expressions = [ + substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + for e ∈ y_tilde_rk_coefs_expressions + ] + y_rk_coefs_expressions = [ + substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + for e ∈ y_rk_coefs_expressions + ] + + if adaptive + y_rk_coefs_err = sum(rk_coefs[j,n_rk_stages+1] * y_tilde[j] for j ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[j,n_rk_stages+2] * y[j] for j ∈ 1:n_rk_stages) + y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + end + + # Construct equations that can be solved for rk_coefs entries by equating the + # coefficients of each k[i], k_implicit[i] in the two sets of expressions + rk_coefs_equations = [] + for (i, (rk_coefs_expr, Butcher_expr)) ∈ enumerate(zip(y_rk_coefs_expressions, y_k_expressions)) + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j]) + rhs = Symbolics.coeff(Butcher_expr, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + if i == 1 + # EXplicit RK coefficients have no entries for i=1, because y_tilde[1]=yn + # always. + continue + end + lhs = Symbolics.coeff(rk_coefs_expr, yn) + rhs = Symbolics.coeff(Butcher_expr, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs) + end + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k[j]) + rhs = Symbolics.coeff(Butcher_expr, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + end + + # Include contribution from y_tilde[n_rk_stages+1] + i = n_rk_stages + 1 + rk_coefs_expr = y_tilde_rk_coefs_expressions[n_rk_stages+1] + Butcher_expr = y_tilde_k_expressions[n_rk_stages+1] + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j]) + rhs = Symbolics.coeff(Butcher_expr, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + lhs = Symbolics.coeff(rk_coefs_expr, yn) + rhs = Symbolics.coeff(Butcher_expr, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0) else - k_subs[1] = (y[2] - y[1]) / a[2,1] + push!(rk_coefs_equations, lhs ~ rhs) end - k_subs[1] = simplify(expand(k_subs[1])) - for i ∈ 2:n_rk_stages-1 - if using_rationals - k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) // a[i+1,i] + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k[j]) + rhs = Symbolics.coeff(Butcher_expr, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0) else - k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) / a[i+1,i] + push!(rk_coefs_equations, lhs ~ rhs + 0) end - k_subs[i] = simplify(expand(k_subs[i])) end - y_out[1] = y[1] - y_out[2] = y[1] + a[2,1] * k[1] - y_out[2] = simplify(expand(y_out[2])) - for i ∈ 3:n_rk_stages - y_out[i] = y[1] + sum(a[i,j]*k_subs[j] for j ∈ 1:i-2) + a[i,i-1]*k[i-1] - y_out[i] = simplify(expand(y_out[i])) + if adaptive + i = n_rk_stages + 1 + lhs = Symbolics.coeff(y_rk_coefs_err, yn) + rhs = Symbolics.coeff(y_loworder, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs) + end + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(y_rk_coefs_err, k[j]) + rhs = Symbolics.coeff(y_loworder, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + i = n_rk_stages + 2 + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(y_rk_coefs_err, k_implicit[j]) + rhs = Symbolics.coeff(y_loworder, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end end - y_out[n_rk_stages+1] = y[1] + sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1) + - b[1,n_rk_stages]*k[n_rk_stages] - y_out[n_rk_stages+1] = simplify(expand(y_out[n_rk_stages+1])) + # Solve rk_coefs_equations for the rk_coefs entries if using_rationals - k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1] - - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) // - b[1,n_rk_stages] + rk_coefs_values = my_solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...]) else - k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1] - - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) / - b[1,n_rk_stages] - end - k_subs[n_rk_stages] = simplify(expand(k_subs[n_rk_stages])) - #println("y_out") - #for i ∈ 1:n_rk_stages+1 - # println(y_out[i]) - #end - #println("k") - #for i ∈ 1:n_rk_stages - # println(k_subs[i]) - #end + rk_coefs_values = Symbolics.solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...]) + end + rk_coefs_implicit_values = reshape(rk_coefs_values[(n_rk_stages+1)*output_size+1:end], n_rk_stages, output_size+1) + rk_coefs_values = reshape(rk_coefs_values[1:(n_rk_stages+1)*output_size], n_rk_stages+1, output_size) if low_storage if using_rationals - rk_coeffs = zeros(Rational{Int64}, 3, output_size) + rk_coefs_out = zeros(Rational{Int64}, 3, output_size) + rk_coefs_implicit_out = zeros(Rational{Int64}, 3, output_size+1) else - rk_coeffs = zeros(3, output_size) + rk_coefs_out = zeros(3, output_size) + rk_coefs_implicit_out = zeros(3, output_size+1) end for i in 1:n_rk_stages - k_coeff = Symbolics.coeff(y_out[i+1], k[i]) - if i == 1 j = i - rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j]) - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[1,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[3,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) + rk_coefs_out[1,i] = rk_coefs_values[1,i] + rk_coefs_out[3,i] = rk_coefs_values[2,i] + for j ∈ 3:n_rk_stages+1 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end else j = 1 - rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j]) - for j ∈ 2:i-2 - if Symbolics.coeff(y_out[i+1], y[j]) != 0 + rk_coefs_out[1,i] = rk_coefs_values[1,i] + for j ∈ 2:i-1 + if rk_coefs_values[j,i] != 0 error("Found non-zero coefficient where zero was expected for low-storage coefficients") end end + rk_coefs_out[2,i] = rk_coefs_values[i,i] + rk_coefs_out[3,i] = rk_coefs_values[i+1,i] + for j ∈ i+2:n_rk_stages+1 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + end + end + for i in 1:n_rk_stages + if i == 1 j = i - rk_coeffs[2,i] = Symbolics.coeff(y_out[i+1], y[j]) - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[2,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[3,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[2,i] + for j ∈ 3:n_rk_stages + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + else + j = 1 + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + for j ∈ 2:i-1 + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[i,i] + if i == n_rk_stages + rk_coefs_implicit_out[3,i] = 0 + else + rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[i+1,i] + end + for j ∈ i+2:n_rk_stages + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end end end - - #for i ∈ 1:n_rk_stages - # println("k$i = ", k_subs[i]) - #end if adaptive - error_coefficients = b[2,:] .- b[1,:] - #println("error_coefficients=", error_coefficients) - #println("error coefficients ", error_coefficients) - y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages) - y_err = simplify(expand(y_err)) - - # Use final column of rk_coeffs to store the coefficients used to calculate the truncation - # error estimate + i = n_rk_stages+1 j = 1 - rk_coeffs[1,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - for j ∈ 2:n_rk_stages-1 - if Symbolics.coeff(y_err, y[j]) != 0 - error("Found non-zero error coefficient where zero was expected for low-storage coefficients") + rk_coefs_out[1,i] = rk_coefs_values[1,i] + for j ∈ 2:i-2 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") end end - j = n_rk_stages - rk_coeffs[2,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - j = n_rk_stages + 1 - rk_coeffs[3,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - end - else - if using_rationals - rk_coeffs = zeros(Rational{Int64}, n_rk_stages+1, output_size) - else - rk_coeffs = zeros(n_rk_stages+1, output_size) - end - for i in 1:n_rk_stages - k_coeff = Symbolics.coeff(y_out[i+1], k[i]) + rk_coefs_out[2,i] = rk_coefs_values[i-1,i] + rk_coefs_out[3,i] = rk_coefs_values[i,i] - for j ∈ 1:i - rk_coeffs[j,i] = Symbolics.coeff(y_out[i+1], y[j]) - end - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[i,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[i+1,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) - end - - #for i ∈ 1:n_rk_stages - # println("k$i = ", k_subs[i]) - #end - if adaptive - error_coefficients = b[2,:] .- b[1,:] - #println("error_coefficients=", error_coefficients) - #println("error coefficients ", error_coefficients) - y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages) - y_err = simplify(expand(y_err)) - - # Use final column of rk_coeffs to store the coefficients used to calculate the truncation - # error estimate - for j ∈ 1:n_rk_stages+1 - rk_coeffs[j,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) + j = 1 + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + for j ∈ 2:i-2 + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end end + j = n_rk_stages + rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[j,i] + rk_coefs_implicit_out[3,i] = 0 #rk_coefs_implicit_values[j+1,i] end + else + rk_coefs_out = rk_coefs_values + rk_coefs_implicit_out = rk_coefs_implicit_values end - return rk_coeffs + return rk_coefs_out, rk_coefs_implicit_out, implicit_coefficient_is_zero +end +function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, + b::Matrix{Rational{Int64}}, + a_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(a)), + b_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(b)); + low_storage=true) + a = Matrix{Rational{BigInt}}(a) + b = Matrix{Rational{BigInt}}(b) + a_implicit = Matrix{Rational{BigInt}}(a_implicit) + b_implicit = Matrix{Rational{BigInt}}(b_implicit) + return convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit; + low_storage=low_storage) end -function convert_rk_coeffs_to_butcher_tableau(rkcoeffs::AbstractArray{T,N}) where {T,N} - adaptive = (abs(sum(rkcoeffs[:,end])) < 1.0e-13) - low_storage = size(rkcoeffs, 1) == 3 +function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, + adaptive, + rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1), + implicit_coefficient_is_zero=nothing + ) where {T,N} + using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational + low_storage = size(rk_coefs, 1) == 3 if adaptive - n_rk_stages = size(rkcoeffs, 2) - 1 + n_rk_stages = size(rk_coefs, 2) - 1 else - n_rk_stages = size(rkcoeffs, 2) + n_rk_stages = size(rk_coefs, 2) + end + if implicit_coefficient_is_zero === nothing + implicit_coefficient_is_zero = zeros(Bool, n_rk_stages) end - @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages] - y = Symbolics.scalarize(y) + @variables y_tilde[1:n_rk_stages+1] yn k[1:n_rk_stages] + y_tilde = Symbolics.scalarize(y_tilde) k = Symbolics.scalarize(k) + @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages] + y = Symbolics.scalarize(y) + k_implicit = Symbolics.scalarize(k_implicit) if low_storage - for i ∈ 1:n_rk_stages - y[i+1] = rkcoeffs[1,i]*y[1] + rkcoeffs[2,i]*y[i] + rkcoeffs[3,i]*(y[i] + k[i]) - end + y_tilde_expressions = [ + yn, + (rk_coefs[1,i-1]*y_tilde[1] + rk_coefs[2,i-1]*y_tilde[i-1] + + rk_coefs[3,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + rk_coefs_implicit[1,i]*y[1] + rk_coefs_implicit[2,i]*y[i-1] + for i ∈ 2:n_rk_stages+1)... + ] + y_expressions = [ + y_tilde_expressions[i] + rk_coefs_implicit[3,i] * k_implicit[i] + for i ∈ 1:n_rk_stages + ] else - for i ∈ 1:n_rk_stages - y[i+1] = sum(rkcoeffs[j,i]*y[j] for j ∈ 1:i) + rkcoeffs[i+1,i]*(y[i] + k[i]) - y[i+1] = simplify(expand(y[i+1])) + y_tilde_expressions = [ + yn, + (sum(rk_coefs[j,i-1]*y_tilde[j] for j ∈ 1:i-1) + + rk_coefs[i,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + sum(rk_coefs_implicit[j,i]*y[j] for j ∈ 1:i-1) + for i ∈ 2:n_rk_stages+1)... + ] + y_expressions = [ + y_tilde_expressions[i] + rk_coefs_implicit[i,i] * k_implicit[i] + for i ∈ 1:n_rk_stages + ] + end + y_tilde_expressions = [simplify(expand(e)) for e ∈ y_tilde_expressions] + y_expressions = [simplify(expand(e)) for e ∈ y_expressions] + if adaptive + if low_storage + i = n_rk_stages + 1 + y_loworder = rk_coefs[1,i]*y_tilde[1] + rk_coefs[2,i]*y_tilde[n_rk_stages] + rk_coefs[3,i]*y_tilde[n_rk_stages+1] + + rk_coefs_implicit[1,i+1]*y[1] + rk_coefs_implicit[2,i+1]*y[n_rk_stages-1] + rk_coefs_implicit[3,i+1]*y[n_rk_stages] + else + y_loworder = sum(rk_coefs[j,n_rk_stages+1]*y_tilde[j] for j ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[j,n_rk_stages+2]*y[j] for j ∈ 1:n_rk_stages) end + y_loworder = simplify(expand(y_loworder)) + end + + # Set up equations to solve for each y_tilde[i] and y[i] in terms of k[i] and + # k_impliti[i] + y_tilde_equations = [y_tilde[i] ~ y_tilde_expressions[i] for i ∈ 1:n_rk_stages+1] + y_equations = [y[i] ~ y_expressions[i] for i ∈ 1:n_rk_stages] + equations = vcat(y_tilde_equations, y_equations) + if using_rationals + expressions = my_solve_for2(equations, vcat(y_tilde, y)) + else + expressions = Symbolics.solve_for(equations, vcat(y_tilde, y)) end - #for i ∈ 1:n_rk_stages+1 - # println("i=$i, y[$i]=", y[i]) - #end + y_tilde_k_expressions = expressions[1:n_rk_stages+1] + y_k_expressions = expressions[n_rk_stages+2:end] if adaptive b = zeros(T, 2, n_rk_stages) + b_implicit = zeros(T, 2, n_rk_stages) else b = zeros(T, 1, n_rk_stages) + b_implicit = zeros(T, 1, n_rk_stages) end for j ∈ 1:n_rk_stages - b[1, j] = Symbolics.coeff(y[n_rk_stages+1], k[j]) + b[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k[j]) + b_implicit[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k_implicit[j]) end if adaptive - if low_storage - yerr = rkcoeffs[1,n_rk_stages+1]*y[1] + - rkcoeffs[2,n_rk_stages+1]*y[n_rk_stages] + - rkcoeffs[3,n_rk_stages+1]*y[n_rk_stages+1] - else - yerr = sum(rkcoeffs[j,n_rk_stages+1]*y[j] for j ∈ 1:n_rk_stages+1) - end - error_coeffs = zeros(T, n_rk_stages) + y_k_loworder = substitute(y_loworder, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_k_loworder = substitute(y_k_loworder, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + y_k_loworder = simplify(expand(y_k_loworder)) for j ∈ 1:n_rk_stages - error_coeffs[j] = Symbolics.coeff(yerr, k[j]) + b[2,j] = Symbolics.coeff(y_k_loworder, k[j]) + b_implicit[2,j] = Symbolics.coeff(y_k_loworder, k_implicit[j]) end - #println("error_coeffs=", error_coeffs) - # b[2,:] is the lower-order solution - @. b[2,:] = error_coeffs + b[1,:] end a = zeros(T, n_rk_stages, n_rk_stages) + a_implicit = zeros(T, n_rk_stages, n_rk_stages) for i ∈ 1:n_rk_stages for j ∈ 1:n_rk_stages - a[i,j] = Symbolics.coeff(y[i], k[j]) + a[i,j] = Symbolics.coeff(y_k_expressions[i], k[j]) + if j == i && implicit_coefficient_is_zero[i] + a_implicit[i,j] = 0 + else + a_implicit[i,j] = Symbolics.coeff(y_k_expressions[i], k_implicit[j]) + end end end - return a, b + return a, b, a_implicit, b_implicit end -function convert_and_check_butcher_tableau(name, a, b; low_storage=true) +function convert_and_check_butcher_tableau(name, a, b, + a_implicit=zeros(eltype(a), size(a)), + b_implicit=zeros(eltype(b), size(b)); + low_storage=true) + imex = any(a_implicit .!= 0) || any(b_implicit .!= 0) + println(name) - rk_coeffs = convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) + rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero = + convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit; + low_storage=low_storage) print("a="); display(a) print("b="); display(b) - print("rk_coeffs="); display(rk_coeffs) + if imex + print("a_implicit="); display(a_implicit) + print("b_implicit="); display(b_implicit) + end + print("rk_coefs="); display(rk_coefs) + if imex + print("rk_coefs_implicit="); display(rk_coefs_implicit) + end + print("rk_coefs(Float64)="); display(Float64.(rk_coefs)) + if imex + print("rk_coefs_implicit(Float64)="); display(Float64.(rk_coefs_implicit)) + end println("a=$a") println("b=$b") - println("rk_coeffs=$rk_coeffs") + if imex + println("a_implicit=$a_implicit") + println("b_implicit=$b_implicit") + end + println("rk_coefs=$rk_coefs") + if imex + println("rk_coefs_implicit=$rk_coefs_implicit") + println("implicit_coefficient_is_zero=$implicit_coefficient_is_zero") + end println() - check_end = size(rk_coeffs, 2) + check_end = size(rk_coefs, 2) if size(b, 1) > 1 # Adaptive timestep - if abs(sum(rk_coeffs[:,end])) > 1.0e-13 - error("Sum of error coefficients should be 0") + error_sum = sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end]) + if abs(error_sum - 1) > 1.0e-13 + error("Sum of loworder coefficients should be 1. Got ", error_sum, " ≈ ", Float64(error_sum)) end check_end -= 1 + adaptive = true + else + adaptive = false end for i ∈ 1:check_end - if abs(sum(rk_coeffs[:,i]) - 1) > 1.0e-13 - error("Sum of RK coefficients should be 1 for each stage") + if low_storage + error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[:,i+1]) + else + error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[1:i,i+1]) + end + if abs(error_sum - 1) > 1.0e-13 + error("Sum of RK coefficients should be 1 for each stage. Got ", error_sum, " ≈ ", Float64(error_sum)) + end + end + if imex + check_end_implicit = size(rk_coefs_implicit, 2) + if size(b_implicit, 1) > 1 + # Adaptive timestep + check_end_implicit -= 1 + end + for i ∈ 1:check_end_implicit - 1 + if !all(abs.(rk_coefs_implicit[i+1:end,i]) .< 1.0e-13) + error("Implicit RK coefficients should be 0 for j>i. Got ", rk_coefs_implicit[i+1:end,i], " ≈ ", Float64.(rk_coefs_implicit[i+1:end,i])) + end + end + for i ∈ 1:check_end_implicit - 1 + if a_implicit[i,i] == 0 + if rk_coefs_implicit[i,i] != 1 + error("Diagonal RK coefficient should be 1 when a_implicit[$i,$i]=0, got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i]) + end + elseif abs(rk_coefs_implicit[i,i] - a_implicit[i,i]) > 1.0e-13 + error("Diagonal RK coefficient should be equal to a_implicit[i,i] for each stage. Got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i] - a_implicit[i,i], " a_implicit[$i,$i]=", a_implicit[i,i]) + end end end # Consistency check: converting back should give the original a, b. - a_check, b_check = convert_rk_coeffs_to_butcher_tableau(rk_coeffs) - #println("check?? ", a_check, " ", b_check) + a_check, b_check, a_check_implicit, b_check_implicit = + convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) - if isa(a[1], Real) - if maximum(abs.(a_check .- a)) > 1.0e-13 - error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n" + if eltype(a) == Rational + if a_check != a + error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end - if maximum(abs.(b_check .- b)) > 1.0e-13 - error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n" + if b_check != b + error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end else - if a_check != a - error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n" + if maximum(abs.(a_check .- a)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end - if b_check != b - error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n" + if maximum(abs.(b_check .- b)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end end + if eltype(a_implicit) == Rational + if a_check_implicit != a_implicit + error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n" + * "Original: $a_implicit\n" + * "New: $a_check_implicit") + end + if b_check_implicit != b_implicit + error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n" + * "Original: $b_implicit\n" + * "New: $b_check_implicit") + end + else + if maximum(abs.(a_check_implicit .- a_implicit)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n" + * "Original: $a_implicit\n" + * "New: $a_check_implicit") + end + if maximum(abs.(b_check_implicit .- b_implicit)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n" + * "Original: $b_implicit\n" + * "New: $b_check_implicit") + end + end end -function convert_and_check_rk_coeffs(name, rk_coeffs) +function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, + rk_coefs_implicit=zeros(eltype(rk_coefs), + size(rk_coefs, 1), + size(rk_coefs, 2) + 1), + implicit_coefficient_is_zero=nothing) + imex = any(rk_coefs_implicit .!= 0) + println(name) - print("rk_coeffs="); display(rk_coeffs) - a, b = convert_rk_coeffs_to_butcher_tableau(rk_coeffs) + print("rk_coefs="); display(rk_coefs) + if imex + print("rk_coefs_implicit="); display(rk_coefs_implicit) + end + a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) print("a="); display(a) print("b="); display(b) + if imex + print("a_implicit="); display(a_implicit) + print("b_implicit="); display(b_implicit) + end println("a=$a") println("b=$b") + if imex + println("a_implicit=$a_implicit") + println("b_implicit=$b_implicit") + end println() end @@ -514,6 +909,12 @@ convert_and_check_butcher_tableau( construct_fekete_3rd_order(4)... ) +convert_and_check_butcher_tableau( + "Fekete 4(3) not low-storage", + construct_fekete_3rd_order(4)...; + low_storage=false + ) + """ construct_fekete_2nd_order(nstage) @@ -561,7 +962,7 @@ convert_and_check_butcher_tableau( construct_fekete_2nd_order(2)... ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk4", [1//2 0 2//3 0 ; 1//2 1//2 0 0 ; @@ -570,7 +971,7 @@ convert_and_check_rk_coeffs( 0 0 0 1//2], ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk3", [0 3//4 1//3; 1 0 0 ; @@ -578,9 +979,90 @@ convert_and_check_rk_coeffs( 0 0 2//3], ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk2", [0 1//2; 0 0 ; 1 1//2], ) + +println("\n\nIMEX methods\n============\n") + +# 4th-order, 7-stage IMEX method 'ARK4(3)7L[2]SA₁' from Kennedy & Carpenter 2019 +# (https://doi.org/10.1016/j.apnum.2018.10.007) +convert_and_check_butcher_tableau( + "KennedyCarpenterARK437", + Rational{BigInt}[0 0 0 0 0 0 0; + 247//1000 0 0 0 0 0 0; + 247//4000 2694949928731//7487940209513 0 0 0 0 0; + 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; + 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; + 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; + 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0; + ], + Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000 ; + 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + Rational{BigInt}[0 0 0 0 0 0 0 ; + 1235//10000 1235//10000 0 0 0 0 0 ; + 624185399699//4186980696204 624185399699//4186980696204 1235//10000 0 0 0 0 ; + 1258591069120//10082082980243 1258591069120//10082082980243 -322722984531//8455138723562 1235//10000 0 0 0 ; + -436103496990//5971407786587 -436103496990//5971407786587 -2689175662187//11046760208243 4431412449334//12995360898505 1235//10000 0 0 ; + -2207373168298//14430576638973 -2207373168298//14430576638973 242511121179//3358618340039 3145666661981//7780404714551 5882073923981//14490790706663 1235//10000 0 ; + 0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 1235//10000; + ], + Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000 ; + 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + ; low_storage=false) + +# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit +# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct. +## 5th-order, 8-stage IMEX method 'ARK5(4)8L[2]SA₂' from Kennedy & Carpenter 2019 +## (https://doi.org/10.1016/j.apnum.2018.10.007) +#convert_and_check_butcher_tableau( +# "KennedyCarpenterARK548", +# Rational{BigInt}[ 0 0 0 0 0 0 0 0; +# 4//9 0 0 0 0 0 0 0; +# 1//9 1183333538310//1827251437969 0 0 0 0 0 0; +# 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; +# -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; +# -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; +# 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; +# 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0; +# ], +# Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9 ; +# 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926; +# ], +# Rational{BigInt}[ 0 0 0 0 0 0 0 0 ; +# 2//9 2//9 0 0 0 0 0 0 ; +# 2366667076620//8822750406821 2366667076620//8822750406821 2//9 0 0 0 0 0 ; +# -257962897183//4451812247028 -257962897183//4451812247028 128530224461//14379561246022 2//9 0 0 0 0 ; +# -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617 1741320951451//6824444397158 2//9 0 0 0 ; +# 621307788657//4714163060173 621307788657//4714163060173 -125196015625//3866852212004 940440206406//7593089888465 961109811699//6734810228204 2//9 0 0 ; +# 2036305566805//6583108094622 2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0 ; +# 0 0 0 0 0 0 0 2//9; +# ], +# Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9 ; +# 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926; +# ], +# ; low_storage=false) + +# 3rd-order, 4-stage IMEX method from Kennedy & Carpenter 2003 +# (https://doi.org/10.1016/S0168-9274(02)00138-1, +# https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf) +convert_and_check_butcher_tableau( + "KennedyCarpenterARK324", + Rational{BigInt}[0 0 0 0; + 1767732205903//2027836641118 0 0 0; + 5535828885825//10492691773637 788022342437//10882634858940 0 0; + 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0; + ], + Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + Rational{BigInt}[0 0 0 0 ; + 1767732205903//4055673282236 1767732205903//4055673282236 0 0 ; + 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0 ; + 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + ], + Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + ; low_storage=false) diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index 5ca7add5a..dd77ab06f 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -6,19 +6,34 @@ function f(y) return y #return 1.0 end +function f_implicit(y, dt) + # Calculate derivative at the end of a 'backward Euler' step so that + # (y_out - y)/dt = f(y_out) + # ⇒ y_out - y = dt * f(y_out) = dt * y_out + # ⇒ y_out = y / (1 - dt) + return f(y/(1 - dt)) +end +function backward_euler(y, dt) + # Do a 'backward Euler' solve so that + # (y_out - y)/dt = f(y_out) + # ⇒ y_out - y = dt * f(y_out) = dt * y_out + # ⇒ y_out = y / (1 - dt) + return y / (1 - dt) +end y0 = 1.0 nsteps = 100 * multiplier t = [i*dt for i ∈ 0:nsteps] analytic = @. y0*exp(t) +analytic_implicit = @. y0*exp(2*t) #analytic = @. 1.0 + t -function rk_advance(rk_coeffs, y0, dt, nsteps) - n_rk_stages = size(rk_coeffs, 1) - 1 - #println("n_rk_stages=$n_rk_stages, ", size(rk_coeffs)) +function rk_advance_explicit(rk_coefs, y0, dt, nsteps) + n_rk_stages = size(rk_coefs, 1) - 1 + #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs)) yscratch = zeros(n_rk_stages + 1) yscratch[1] = y0 - adaptive = size(rk_coeffs, 2) > n_rk_stages + adaptive = size(rk_coefs, 2) > n_rk_stages result = zeros(nsteps+1) result[1] = y0 @@ -28,8 +43,58 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) for it ∈ 1:nsteps for istage ∈ 1:n_rk_stages yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) - this_coeffs = rk_coeffs[:,istage] - yscratch[istage+1] = sum(this_coeffs[i]*yscratch[i] for i ∈ 1:istage+1) + this_coefs = rk_coefs[:,istage] + yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1) + end + #k1 = 2*(yscratch[2] - yscratch[1]) + #k2 = 2*(yscratch[3] - yscratch[1]) + #k3 = yscratch[4] - yscratch[1] + #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 + #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) + if adaptive + loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + error[it+1] = loworder - yscratch[end] + end + yscratch[1] = yscratch[end] + result[it+1] = yscratch[end] + end + + return result, error +end + +function rk_advance(rk_coefs, y0, dt, nsteps, rk_coefs_implicit=nothing, implicit_coefficient_is_zero=nothing) + + n_rk_stages = size(rk_coefs, 1) - 1 + + if rk_coefs_implicit === nothing && implicit_coefficient_is_zero === nothing + rk_coefs_implicit = zeros(n_rk_stages, n_rk_stages + 2) + implicit_coefficient_is_zero = zeros(Bool, n_rk_stages) + end + + #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs)) + yscratch = zeros(n_rk_stages + 1) + yscratch_implicit = zeros(n_rk_stages) + yscratch[1] = y0 + adaptive = size(rk_coefs, 2) > n_rk_stages + + result = zeros(nsteps+1) + result[1] = y0 + + error = zeros(nsteps+1) + + for it ∈ 1:nsteps + for istage ∈ 1:n_rk_stages + if implicit_coefficient_is_zero[istage] + yscratch_implicit[istage] = yscratch[istage] + dt*f_implicit(yscratch[istage], 0.0) + yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) + else + yscratch_implicit[istage] = backward_euler(yscratch[istage], dt*rk_coefs_implicit[istage,istage]) + yscratch[istage+1] = yscratch_implicit[istage] + dt*f(yscratch_implicit[istage]) + end + this_coefs = rk_coefs[:,istage] + this_coefs_implicit = rk_coefs_implicit[:,istage+1] + yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1) + + sum(this_coefs_implicit[i]*yscratch_implicit[i] for i ∈ 1:istage) end #k1 = 2*(yscratch[2] - yscratch[1]) #k2 = 2*(yscratch[3] - yscratch[1]) @@ -37,7 +102,9 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) if adaptive - error[it+1] = sum(rk_coeffs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[i, n_rk_stages+2]*yscratch_implicit[i] for i ∈ 1:n_rk_stages) + error[it+1] = loworder - yscratch[end] end yscratch[1] = yscratch[end] result[it+1] = yscratch[end] @@ -46,8 +113,8 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) return result, error end -function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) - n_rk_stages = size(rk_coeffs, 2) +function rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) + n_rk_stages = size(rk_coefs, 2) println("check n_rk_stages=$n_rk_stages") yscratch = zeros(n_rk_stages + 1) @@ -59,9 +126,9 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) for it ∈ 1:nsteps for istage ∈ 1:n_rk_stages yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) - this_coeffs = rk_coeffs[:,istage] - #println("istage=$istage, this_coeffs=$this_coeffs") - yscratch[istage+1] = this_coeffs[1]*yscratch[1] + this_coeffs[2]*yscratch[istage] + this_coeffs[3]*yscratch[istage+1] + this_coefs = rk_coefs[:,istage] + #println("istage=$istage, this_coefs=$this_coefs") + yscratch[istage+1] = this_coefs[1]*yscratch[1] + this_coefs[2]*yscratch[istage] + this_coefs[3]*yscratch[istage+1] #println("istage=$istage, ", yscratch[istage+1]) end #println("before yscratch=$yscratch") @@ -73,7 +140,7 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) return result end -function rk_advance_butcher(a, b, y0, dt, nsteps) +function rk_advance_butcher_explicit(a, b, y0, dt, nsteps) n_rk_stages = size(a, 2) kscratch = zeros(n_rk_stages) y = y0 @@ -105,6 +172,55 @@ function rk_advance_butcher(a, b, y0, dt, nsteps) return result, error end +function rk_advance_butcher(a, b, y0, dt, nsteps, a_implicit=nothing, b_implicit=nothing) + n_rk_stages = size(a, 2) + + if a_implicit === nothing && b_implicit === nothing + a_implicit = zeros(n_rk_stages, n_rk_stages) + b_implicit = zeros(size(b)) + end + + kscratch = zeros(n_rk_stages) + kscratch_implicit = zeros(n_rk_stages) + y = y0 + if ndims(b) == 1 + b = b' + end + adaptive = size(b, 1) > 1 + + result = zeros(nsteps+1) + result[1] = y0 + + error = zeros(nsteps+1) + + for it ∈ 1:nsteps + kscratch[1] = dt*f(y) + kscratch_implicit[1] = dt*f_implicit(y, a_implicit[1,1] * dt) + for i ∈ 2:n_rk_stages + ytilde = y + + sum(a[i,j] * kscratch[j] for j ∈ 1:i-1) + + sum(a_implicit[i,j] * kscratch_implicit[j] for j ∈ 1:i-1) + ystage = backward_euler(ytilde, dt * a_implicit[i,i]) + kscratch_implicit[i] = dt*f_implicit(ytilde, dt * a_implicit[i,i]) + kscratch[i] = dt*f(ystage) + end + if adaptive + y_loworder = y + + sum(b[2,j]*kscratch[j] for j ∈ 1:n_rk_stages) + + sum(b_implicit[2,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages) + end + y = y + + sum(b[1,j]*kscratch[j] for j ∈ 1:n_rk_stages) + + sum(b_implicit[1,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages) + if adaptive + error[it+1] = y_loworder - y + end + result[it+1] = y + end + + return result, error +end + function rk4_by_hand(y0, dt, nsteps) result = zeros(nsteps+1) y = y0 @@ -122,31 +238,31 @@ function rk4_by_hand(y0, dt, nsteps) end methods = Dict( - "SSPRK3" => (rk_coeffs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3], + "SSPRK3" => (rk_coefs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3], a=Float64[0 0 0; 1 0 0; 1//4 1//4 0], b=Float64[1//6 1//6 2//3]), - "RK4" => (rk_coeffs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6], + "RK4" => (rk_coefs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6], a = Float64[0 0 0 0; 1//2 0 0 0; 0 1//2 0 0; 0 0 1 0], b = Float64[1//6 1//3 1//3 1//6]), - "RKF45" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 -1], + "RKF45" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 0], a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0], b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55; 25//216 0 1408//2565 2197//4104 -1//5 0]), - "RKF45 truncated" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55], + "RKF45 truncated" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55], a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0], b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55]), - "Heun SSPRK2" => (rk_coeffs = Float64[0 1//2; 1 0; 0 1//2], + "Heun SSPRK2" => (rk_coefs = Float64[0 1//2; 1 0; 0 1//2], a = Float64[0 0; 1 0], b = Float64[1//2 1//2]), - "Gottlieb 43" => (rk_coeffs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3], + "Gottlieb 43" => (rk_coefs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3], a = Float64[0 0 0; 1 0 0; 1//2 1//2 0], b = Float64[1//6 1//6 2//3]), - "mk ssprk3" => (rk_coeffs = Float64[1//2 0 2//3 0 ; + "mk ssprk3" => (rk_coefs = Float64[1//2 0 2//3 0 ; 1//2 1//2 0 0 ; 0 1//2 1//6 0 ; 0 0 1//6 1//2; @@ -154,50 +270,91 @@ methods = Dict( a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "mk ssprk2" => (rk_coeffs = Float64[0.0 0.5 0.0; - 1.0 0.0 0.0; - 0.0 0.5 0.0], + "mk ssprk2" => (rk_coefs = Float64[0.0 0.5; + 1.0 0.0; + 0.0 0.5], a = Float64[0.0 0.0; 1.0 0.0], b = Float64[0.5 0.5; 0.5 0.5]), - "Fekete 43" => (rk_coeffs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 -1//2], + "Fekete 43" => (rk_coefs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 1//2], a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2; 1//4 1//4 1//4 1//4]), - "Fekete 43 truncated" => (rk_coeffs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2], + "Fekete 43 truncated" => (rk_coefs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2], a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "Fekete 42" => (rk_coeffs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 -1//4], + "Fekete 42" => (rk_coefs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 3//4], a = Float64[0 0 0 0; 1//3 0 0 0; 1//3 1//3 0 0; 1//3 1//3 1//3 0], b = Float64[1//4 1//4 1//4 1//4; 5//16 1//4 1//4 3//16]), - "Fekete 10,4" => (rk_coeffs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 -1], + "Fekete 10,4" => (rk_coefs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 0], a = Float64[0 0 0 0 0 0 0 0 0 0; 1//6 0 0 0 0 0 0 0 0 0; 1//6 1//6 0 0 0 0 0 0 0 0; 1//6 1//6 1//6 0 0 0 0 0 0 0; 1//6 1//6 1//6 1//6 0 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 1//6 0], b = Float64[1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10; 1//5 0 0 3//10 0 0 1//5 0 3//10 0]), - "Fekete 6,4" => (rk_coeffs = [0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094; 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622; 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522; 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305; 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745; 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524], + "Fekete 6,4" => (rk_coefs = [0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 -6.304828384656085e-17 -0.1813232670344333 -0.0017300417984673633; 0.3552975516919 0.4295138541066736 -6.460461358323626e-14 -1.1868936325049587e-13 3.608184516786869e-18 2.9392365006883485e-14 -0.18902907903375094; -0.0 0.33178669836 0.25530138316744333 -3.3545605887402925e-14 -1.0929532856876731e-17 0.20598080026766677 0.2504712436879622; -0.0 -0.0 0.1972127376054 0.3518900216285391 7.036963218665071e-17 0.47926701162417157 -0.939747918037452; -0.0 -0.0 -0.0 0.2718245927242 0.5641843457422999 9.97599117309567e-14 1.1993626679930303; -0.0 -0.0 -0.0 -0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745; -0.0 -0.0 -0.0 -0.0 -0.0 0.1544186678729 1.2117066988196523], a = [0.0 0.0 0.0 0.0 0.0 0.0; 0.3552975516919 0.0 0.0 0.0 0.0 0.0; 0.2704882223931 0.33178669836 0.0 0.0 0.0 0.0; 0.1223997401356 0.1501381660925 0.1972127376054 0.0 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.4358156542577 0.0], b = [0.1522491819555 0.1867521364225 0.1555370561501 0.1348455085546 0.2161974490441 0.1544186678729; 0.1210663237182 0.230884400455 0.0853424972752 0.3450614904457 0.0305351538213 0.1871101342844]), + + "KennedyCarpenterARK437" => (rk_coefs = Float64[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; 0 0 0 0 0 0 247//2000 2441//2470], + rk_coefs_implicit = Float64[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; 0 0 0 0 0 0 247//2000 1753//2000 0], + implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0], + a = Float64[0 0 0 0 0 0 0; 247//1000 0 0 0 0 0 0; 247//4000 2694949928731//7487940209513 0 0 0 0 0; 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0], + b = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + a_implicit = Float64[0 0 0 0 0 0 0; 247//2000 247//2000 0 0 0 0 0; 624185399699//4186980696204 624185399699//4186980696204 247//2000 0 0 0 0; 1258591069120//10082082980243 1258591069120//10082082980243 -322722984531//8455138723562 247//2000 0 0 0; -436103496990//5971407786587 -436103496990//5971407786587 -2689175662187//11046760208243 4431412449334//12995360898505 247//2000 0 0; -2207373168298//14430576638973 -2207373168298//14430576638973 242511121179//3358618340039 3145666661981//7780404714551 5882073923981//14490790706663 247//2000 0; 0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000], + b_implicit = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + ), + +# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit +# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct. +# "KennedyCarpenterARK548" => (rk_coefs=Rational{BigInt}[], +# rk_coefs_implicit = Float64[], +# implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0, 0], +# a = Float64[0 0 0 0 0 0 0 0; 4//9 0 0 0 0 0 0 0; 1//9 1183333538310//1827251437969 0 0 0 0 0 0; 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0], +# b = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], +# a_implicit = Float64[0 0 0 0 0 0 0 0; 2//9 2//9 0 0 0 0 0 0; 2366667076620//8822750406821 2366667076620//8822750406821 2//9 0 0 0 0 0; -257962897183//4451812247028 -257962897183//4451812247028 128530224461//14379561246022 2//9 0 0 0 0; -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617 1741320951451//6824444397158 2//9 0 0 0; 621307788657//4714163060173 621307788657//4714163060173 -125196015625//3866852212004 940440206406//7593089888465 961109811699//6734810228204 2//9 0 0; 2036305566805//6583108094622 2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0; 0 0 0 0 0 0 0 2//9], +# b_implicit = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], +# ), + + "KennedyCarpenterARK324" => (rk_coefs = Float64[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; 0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825], + rk_coefs_implicit = Float64[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0], + implicit_coefficient_is_zero = Bool[1, 0, 0, 0], + a = Float64[0 0 0 0; 1767732205903//2027836641118 0 0 0; 5535828885825//10492691773637 788022342437//10882634858940 0 0; 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0], + b = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + a_implicit = Float64[0 0 0 0; 1767732205903//4055673282236 1767732205903//4055673282236 0 0; 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0; 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236], + b_implicit = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100] + ), ) -a, b = convert_rk_coeffs_to_butcher_tableau(methods["RKF45"].rk_coeffs) -methods["RKF45 attempt 2"] = (rk_coeffs = methods["RKF45"].rk_coeffs, +a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true) +methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, a = a, b = b) for (k,v) ∈ methods + imex = any(:rk_coefs_implicit ∈ keys(v)) + println("\n", k) - result, error = rk_advance(v.rk_coeffs, y0, dt, nsteps) - result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps) + + if imex + this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps, v.rk_coefs_implicit, v.implicit_coefficient_is_zero) + result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps, v.a_implicit, v.b_implicit) + else + this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps) + result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps) + end #for i ∈ 1:multiplier:nsteps+1 - # println("$i t=", t[i], " analytic=", analytic[i], " result=", result[i], " result_butcher=", result_butcher[i]) + # println("$i t=", t[i], " analytic=", analytic[i], " result=", this_result[i], " result_butcher=", result_butcher[i]) #end println("t=", t[end]) - println("analytic = ", analytic[end]) - println("result = ", result[end]) + if imex + println("analytic = ", analytic_implicit[end]) + else + println("analytic = ", analytic[end]) + end + println("result = ", this_result[end]) println("result_butcher = ", result_butcher[end]) - println("error = ", error[end]) + println("error = ", this_error[end]) println("error_butcher = ", error_butcher[end]) end @@ -230,7 +387,7 @@ elseif n_rk_stages == 1 else error("Unsupported number of RK stages, n_rk_stages=$n_rk_stages") end -result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) +ssprk3_result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) println("t=", t[end]) println("analytic = ", analytic[end]) -println("result = ", result[end]) +println("result = ", ssprk3_result[end])