diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
index 725a16e86..c64e74a5d 100644
--- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
+++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
@@ -2462,9 +2462,6 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title
     if title !== nothing
         ax.title = title
     end
-    if yscale !== nothing
-        ax.yscale = yscale
-    end
 
     if transform !== identity
         # Use transform to allow user to do something like data = abs.(data)
@@ -2476,6 +2473,10 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title
 
     l = lines!(ax, xcoord, data; kwargs...)
 
+    if yscale !== nothing
+        ax.yscale = yscale
+    end
+
     if fig === nothing
         return l
     else
@@ -4062,8 +4063,9 @@ end
 
 function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing,
                                  neutral=false, is=1, iz=nothing, fig=nothing, ax=nothing,
-                                 frame_index=nothing, outfile=nothing, transform=identity,
-                                 axis_args=Dict{Symbol,Any}(), kwargs...)
+                                 frame_index=nothing, outfile=nothing, yscale=nothing,
+                                 transform=identity, axis_args=Dict{Symbol,Any}(),
+                                 kwargs...)
     if input === nothing
         if neutral
             input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"])
@@ -4141,20 +4143,27 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing,
 
         this_f_unnorm = get_this_f_unnorm(it)
 
-        this_fmin, this_fmax = NaNMath.extrema(transform(this_f_unnorm))
+        this_fmin, this_fmax = NaNMath.extrema(transform.(this_f_unnorm))
         fmin = min(fmin, this_fmin)
         fmax = max(fmax, this_fmax)
     end
     yheight = fmax - fmin
     xwidth = dzdtmax - dzdtmin
-    limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth,
-            fmin - 0.01*yheight, fmax + 0.01*yheight)
+    if yscale ∈ (log, log10)
+        # Need to calclutate y offsets differently to non-logarithmic y-axis case, to
+        # ensure ymin is not negative.
+        limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth,
+                fmin * (fmin/fmax)^0.01, fmax * (fmax/fmin)^0.01)
+    else
+        limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth,
+                fmin - 0.01*yheight, fmax + 0.01*yheight)
+    end
 
     dzdt = @lift vpagrid_to_dzdt(run_info.vpa.grid, vth[$frame_index], upar[$frame_index],
                                  run_info.evolve_ppar, run_info.evolve_upar)
     f_unnorm = @lift transform.(get_this_f_unnorm($frame_index))
 
-    l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, kwargs...)
+    l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, yscale=yscale, kwargs...)
 
     if outfile !== nothing
         if fig === nothing
@@ -4403,18 +4412,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix)
                          && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar)
                          for ri ∈ run_info)
 
-    for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"),
-                               (z_upper, z_upper-8:z_upper, "wall+"))
+    nt = minimum(ri.nt for ri ∈ run_info)
+
+    for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"),
+                               (z_upper, z_upper-4:z_upper, "wall+"))
         f_input = copy(input_dict_dfns["f"])
         f_input["iz0"] = z
 
         if input.plot
-            plot_vs_vpa(run_info, "f"; is=1, input=f_input,
-                        outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf")
+            fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f")
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input,
+                                label="$(run_label)iz=$iz", ax=ax)
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf"
+            save(outfile, fig)
+
+            fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f")
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input,
+                                label="$(run_label)iz=$iz", ax=ax, yscale=log10,
+                                transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "logpdf_$(label)_vs_vpa.pdf"
+            save(outfile, fig)
 
             if moment_kinetic
-                plot_f_unnorm_vs_vpa(run_info; input=f_input, is=1,
-                                     outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf")
+                fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm")
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz,
+                                             label="$(run_label)iz=$iz", ax=ax)
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf"
+                save(outfile, fig)
+
+                fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm")
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz,
+                                             label="$(run_label)iz=$iz", ax=ax, yscale=log10,
+                                             transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa.pdf"
+                save(outfile, fig)
             end
 
             plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1,
@@ -4442,12 +4513,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix)
         end
 
         if input.animate
-            animate_vs_vpa(run_info, "f"; is=1, input=f_input,
-                           outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext)
+            fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f")
+            frame_index = Observable(1)
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input,
+                                   label="$(run_label)iz=$iz", ax=ax,
+                                   frame_index=frame_index)
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext
+            save_animation(fig, frame_index, nt, outfile)
+
+            fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f", yscale=log10)
+            frame_index = Observable(1)
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input,
+                                   label="$(run_label)iz=$iz", ax=ax,
+                                   frame_index=frame_index,
+                                   transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "logpdf_$(label)_vs_vpa." * input.animation_ext
+            save_animation(fig, frame_index, nt, outfile)
 
             if moment_kinetic
-                animate_f_unnorm_vs_vpa(run_info; input=f_input, is=1,
-                                        outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext)
+                fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f")
+                frame_index = Observable(1)
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input,
+                                                label="$(run_label)iz=$iz", ax=ax,
+                                                frame_index=frame_index)
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext
+                save_animation(fig, frame_index, nt, outfile)
+
+                fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f")
+                frame_index = Observable(1)
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input,
+                                                label="$(run_label)iz=$iz", ax=ax,
+                                                frame_index=frame_index, yscale=log10,
+                                                transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa." * input.animation_ext
+                save_animation(fig, frame_index, nt, outfile)
             end
 
             animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1,
@@ -4528,19 +4667,82 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix)
     moment_kinetic = any(ri !== nothing
                          && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar)
                          for ri ∈ run_info)
+    nt = minimum(ri.nt for ri ∈ run_info)
 
-    for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"),
-                               (z_upper, z_upper-8:z_upper, "wall+"))
+    for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"),
+                               (z_upper, z_upper-4:z_upper, "wall+"))
         f_neutral_input = copy(input_dict_dfns["f_neutral"])
         f_neutral_input["iz0"] = z
 
         if input.plot
-            plot_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input,
-                       outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf")
+            fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral")
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input,
+                               label="$(run_label)iz=$iz", ax=ax)
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf"
+            save(outfile, fig)
+
+            fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral")
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input,
+                               label="$(run_label)iz=$iz", ax=ax, yscale=log10,
+                               transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vpa.pdf"
+            save(outfile, fig)
 
             if moment_kinetic
-                plot_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1,
-                                     outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf")
+                fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm")
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input,
+                                             is=1, iz=iz, label="$(run_label)iz=$iz",
+                                             ax=ax)
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf"
+                save(outfile, fig)
+
+                fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm")
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input,
+                                             is=1, iz=iz, label="$(run_label)iz=$iz",
+                                             ax=ax, yscale=log10,
+                                             transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vpa.pdf"
+                save(outfile, fig)
             end
 
             if !is_1V
@@ -4583,12 +4785,81 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix)
         end
 
         if input.animate
-            animate_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input,
-                          outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext)
+            fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral")
+            frame_index = Observable(1)
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input,
+                                  label="$(run_label)iz=$iz", ax=ax,
+                                  frame_index=frame_index)
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext
+            save_animation(fig, frame_index, nt, outfile)
+
+            fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral", yscale=log10)
+            frame_index = Observable(1)
+            for iz ∈ z_range
+                for ri ∈ run_info
+                    if length(run_info) > 1
+                        run_label = ri.run_name * " "
+                    else
+                        run_label = ""
+                    end
+                    animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input,
+                                  label="$(run_label)iz=$iz", ax=ax,
+                                  frame_index=frame_index,
+                                  transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                end
+            end
+            put_legend_right(fig, ax)
+            outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vz." * input.animation_ext
+            save_animation(fig, frame_index, nt, outfile)
 
             if moment_kinetic
-                animate_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1,
-                                        outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext)
+                fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral")
+                frame_index = Observable(1)
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz,
+                                                input=f_neutral_input,
+                                                label="$(run_label)iz=$iz", ax=ax,
+                                                frame_index=frame_index)
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext
+                save_animation(fig, frame_index, nt, outfile)
+
+                fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral")
+                frame_index = Observable(1)
+                for iz ∈ z_range
+                    for ri ∈ run_info
+                        if length(run_info) > 1
+                            run_label = ri.run_name * " "
+                        else
+                            run_label = ""
+                        end
+                        animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz,
+                                                input=f_neutral_input, label="$(run_label)iz=$iz",
+                                                ax=ax, frame_index=frame_index, yscale=log10,
+                                                transform=(x)->positive_or_nan(x; epsilon=1.e-20))
+                    end
+                end
+                put_legend_right(fig, ax)
+                outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext
+                save_animation(fig, frame_index, nt, outfile)
             end
 
             if !is_1V
@@ -4755,35 +5026,35 @@ function constraints_plots(run_info; plot_prefix=plot_prefix)
             end
 
             # Electrons
-            if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info)
-
-                fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient")
-                for ri ∈ run_info
-                    if length(run_info) > 1
-                        prefix = ri.run_name * ", "
-                    else
-                        prefix = ""
-                    end
-
-                    varname = "electron_constraints_A_coefficient"
-                    label = prefix * "(A-1)"
-                    data = get_variable(ri, varname; it=it0, ir=ir0)
-                    data .-= 1.0
-                    plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input)
-
-                    varname = "electron_constraints_B_coefficient"
-                    label = prefix * "B"
-                    plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0,
-                              input=input)
-
-                    varname = "electron_constraints_C_coefficient"
-                    label = prefix * "C"
-                    plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0,
-                              input=input)
-                end
-                put_legend_right(fig, ax)
-                save(plot_prefix * "electron_constraints.pdf", fig)
-            end
+            #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info)
+
+            #    fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient")
+            #    for ri ∈ run_info
+            #        if length(run_info) > 1
+            #            prefix = ri.run_name * ", "
+            #        else
+            #            prefix = ""
+            #        end
+
+            #        varname = "electron_constraints_A_coefficient"
+            #        label = prefix * "(A-1)"
+            #        data = get_variable(ri, varname; it=it0, ir=ir0)
+            #        data .-= 1.0
+            #        plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input)
+
+            #        varname = "electron_constraints_B_coefficient"
+            #        label = prefix * "B"
+            #        plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0,
+            #                  input=input)
+
+            #        varname = "electron_constraints_C_coefficient"
+            #        label = prefix * "C"
+            #        plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0,
+            #                  input=input)
+            #    end
+            #    put_legend_right(fig, ax)
+            #    save(plot_prefix * "electron_constraints.pdf", fig)
+            #end
         end
 
         if input.animate
@@ -4917,53 +5188,53 @@ function constraints_plots(run_info; plot_prefix=plot_prefix)
             end
 
             # Electrons
-            if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info)
-
-                frame_index = Observable(1)
-                fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient")
-
-                # Calculate plot limits manually so we can exclude the first time point, which
-                # often has a large value for (A-1) due to the way initialisation is done,
-                # which can make the subsequent values hard to see.
-                ymin = Inf
-                ymax = -Inf
-                for ri ∈ run_info
-                    if length(run_info) > 1
-                        prefix = ri.run_name * ", "
-                    else
-                        prefix = ""
-                    end
-
-                    varname = "electron_constraints_A_coefficient"
-                    label = prefix * "(A-1)"
-                    data = get_variable(ri, varname; ir=ir0)
-                    data .-= 1.0
-                    ymin = min(ymin, minimum(data[:,2:end]))
-                    ymax = max(ymax, maximum(data[:,2:end]))
-                    animate_vs_z(ri, varname; label=label, data=data,
-                                 frame_index=frame_index, ax=ax, input=input)
-
-                    varname = "electron_constraints_B_coefficient"
-                    label = prefix * "B"
-                    data = get_variable(ri, varname; ir=ir0)
-                    ymin = min(ymin, minimum(data[:,2:end]))
-                    ymax = max(ymax, maximum(data[:,2:end]))
-                    animate_vs_z(ri, varname; label=label, data=data,
-                                 frame_index=frame_index, ax=ax, ir=ir0, input=input)
-
-                    varname = "electron_constraints_C_coefficient"
-                    label = prefix * "C"
-                    data = get_variable(ri, varname; ir=ir0)
-                    ymin = min(ymin, minimum(data[:,2:end]))
-                    ymax = max(ymax, maximum(data[:,2:end]))
-                    animate_vs_z(ri, varname; label=label, data=data,
-                                 frame_index=frame_index, ax=ax, ir=ir0, input=input)
-                end
-                put_legend_right(fig, ax)
-                ylims!(ax, ymin, ymax)
-                save_animation(fig, frame_index, nt,
-                               plot_prefix * "electron_constraints." * input.animation_ext)
-            end
+            #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info)
+
+            #    frame_index = Observable(1)
+            #    fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient")
+
+            #    # Calculate plot limits manually so we can exclude the first time point, which
+            #    # often has a large value for (A-1) due to the way initialisation is done,
+            #    # which can make the subsequent values hard to see.
+            #    ymin = Inf
+            #    ymax = -Inf
+            #    for ri ∈ run_info
+            #        if length(run_info) > 1
+            #            prefix = ri.run_name * ", "
+            #        else
+            #            prefix = ""
+            #        end
+
+            #        varname = "electron_constraints_A_coefficient"
+            #        label = prefix * "(A-1)"
+            #        data = get_variable(ri, varname; ir=ir0)
+            #        data .-= 1.0
+            #        ymin = min(ymin, minimum(data[:,2:end]))
+            #        ymax = max(ymax, maximum(data[:,2:end]))
+            #        animate_vs_z(ri, varname; label=label, data=data,
+            #                     frame_index=frame_index, ax=ax, input=input)
+
+            #        varname = "electron_constraints_B_coefficient"
+            #        label = prefix * "B"
+            #        data = get_variable(ri, varname; ir=ir0)
+            #        ymin = min(ymin, minimum(data[:,2:end]))
+            #        ymax = max(ymax, maximum(data[:,2:end]))
+            #        animate_vs_z(ri, varname; label=label, data=data,
+            #                     frame_index=frame_index, ax=ax, ir=ir0, input=input)
+
+            #        varname = "electron_constraints_C_coefficient"
+            #        label = prefix * "C"
+            #        data = get_variable(ri, varname; ir=ir0)
+            #        ymin = min(ymin, minimum(data[:,2:end]))
+            #        ymax = max(ymax, maximum(data[:,2:end]))
+            #        animate_vs_z(ri, varname; label=label, data=data,
+            #                     frame_index=frame_index, ax=ax, ir=ir0, input=input)
+            #    end
+            #    put_legend_right(fig, ax)
+            #    ylims!(ax, ymin, ymax)
+            #    save_animation(fig, frame_index, nt,
+            #                   plot_prefix * "electron_constraints." * input.animation_ext)
+            #end
         end
     catch e
         println("Error in constraints_plots(). Error was ", e)
@@ -6893,9 +7164,9 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
 
         input = Dict_to_NamedTuple(input_dict["timestep_diagnostics"])
 
-         steps_fig = nothing
-         dt_fig = nothing
-         CFL_fig = nothing
+        steps_fig = nothing
+        dt_fig = nothing
+        CFL_fig = nothing
 
         if input.plot
             # Plot numbers of steps and numbers of failures
@@ -6916,13 +7187,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
                     prefix = ri.run_name * " "
                 end
 
-                plot_1d(ri.time, get_variable(ri, "steps_per_output"; it=it);
+                if it !== nothing
+                    time = ri.time[it]
+                else
+                    time = ri.time
+                end
+
+                plot_1d(time, get_variable(ri, "steps_per_output"; it=it);
                         label=prefix * "steps", ax=ax)
                 # Fudge to create an invisible line on ax_failures that cycles the line colors
                 # and adds a label for "steps_per_output" to the plot because we create the
                 # legend from ax_failures.
-                plot_1d([ri.time[1]], [0]; label=prefix * "steps", ax=ax_failures)
-                plot_1d(ri.time, get_variable(ri, "failures_per_output"; it=it);
+                plot_1d([time[1]], [0]; label=prefix * "steps", ax=ax_failures)
+                plot_1d(time, get_variable(ri, "failures_per_output"; it=it);
                         label=prefix * "failures", ax=ax_failures)
 
                 failure_caused_by_per_output = get_variable(ri,
@@ -6931,55 +7208,63 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
                 counter = 0
                 # Ion pdf failure counter
                 counter += 1
-                plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                plot_1d(time, @view failure_caused_by_per_output[counter,:];
                         label=prefix * "failures caused by f_ion", ax=ax_failures)
                 if ri.evolve_density
                     # Ion density failure counter
                     counter += 1
-                    plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                    plot_1d(time, @view failure_caused_by_per_output[counter,:];
                             linestyle=:dash, label=prefix * "failures caused by n_ion",
                             ax=ax_failures)
                 end
                 if ri.evolve_upar
                     # Ion flow failure counter
                     counter += 1
-                    plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                    plot_1d(time, @view failure_caused_by_per_output[counter,:];
                             linestyle=:dash, label=prefix * "failures caused by u_ion",
                             ax=ax_failures)
                 end
                 if ri.evolve_ppar
                     # Ion flow failure counter
                     counter += 1
-                    plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                    plot_1d(time, @view failure_caused_by_per_output[counter,:];
                             linestyle=:dash, label=prefix * "failures caused by p_ion",
                             ax=ax_failures)
                 end
                 if ri.n_neutral_species > 0
                     # Neutral pdf failure counter
                     counter += 1
-                    plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                    plot_1d(time, @view failure_caused_by_per_output[counter,:];
                             label=prefix * "failures caused by f_neutral", ax=ax_failures)
                     if ri.evolve_density
                         # Neutral density failure counter
                         counter += 1
-                        plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                        plot_1d(time, @view failure_caused_by_per_output[counter,:];
                                 linestyle=:dash,
                                 label=prefix * "failures caused by n_neutral", ax=ax_failures)
                     end
                     if ri.evolve_upar
                         # Neutral flow failure counter
                         counter += 1
-                        plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                        plot_1d(time, @view failure_caused_by_per_output[counter,:];
                                 linestyle=:dash,
                                 label=prefix * "failures caused by u_neutral", ax=ax_failures)
                     end
                     if ri.evolve_ppar
                         # Neutral flow failure counter
                         counter += 1
-                        plot_1d(ri.time, @view failure_caused_by_per_output[counter,:];
+                        plot_1d(time, @view failure_caused_by_per_output[counter,:];
                                 linestyle=:dash,
                                 label=prefix * "failures caused by p_neutral", ax=ax_failures)
                     end
+                    if occursin("ARK", ri.t_input["type"])
+                        # Nonlinear iteration failed to converge in implicit part of
+                        # timestep
+                        counter += 1
+                        plot_1d(time, @view failure_caused_by_per_output[counter,:];
+                                linestyle=:dot,
+                                label=prefix * "nonlinear iteration convergence failure", ax=ax_failures)
+                    end
                 end
 
                 if counter > size(failure_caused_by_per_output, 1)
@@ -7016,20 +7301,50 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
                 else
                     prefix = ri.run_name * " "
                 end
-                CFL_vars = ["minimum_CFL_ion_z", "minimum_CFL_ion_vpa"]
+                if it !== nothing
+                    time = ri.time[it]
+                else
+                    time = ri.time
+                end
+
+                CFL_vars = String[]
+                implicit_CFL_vars = String[]
+
+                push!(CFL_vars, "minimum_CFL_ion_z")
+                if occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"]
+                    push!(implicit_CFL_vars, "minimum_CFL_ion_z")
+                end
+                push!(CFL_vars, "minimum_CFL_ion_vpa")
+                if occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"])
+                    push!(implicit_CFL_vars, "minimum_CFL_ion_vpa")
+                end
                 if ri.n_neutral_species > 0
                     push!(CFL_vars, "minimum_CFL_neutral_z", "minimum_CFL_neutral_vz")
                 end
                 for varname ∈ CFL_vars
                     var = get_variable(ri, varname)
-                    maxval = min(maxval, maximum(var))
-                    plot_1d(ri.time, var; ax=ax, label=prefix*varname)
+                    maxval = NaNMath.min(maxval, NaNMath.maximum(var))
+                    if occursin("neutral", varname)
+                        if varname ∈ implicit_CFL_vars
+                            linestyle = :dashdot
+                        else
+                            linestyle = :dash
+                        end
+                    else
+                        if varname ∈ implicit_CFL_vars
+                            linestyle = :dot
+                        else
+                            linestyle = nothing
+                        end
+                    end
+                    plot_1d(time, var; ax=ax, label=prefix*varname, linestyle=linestyle)
                 end
             end
-            ylims!(ax, 0.0, 4.0 * maxval)
+            ylims!(ax, 0.0, 10.0 * maxval)
             put_legend_right(CFL_fig, ax)
 
-            limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output")
+            limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output",
+                                       size=(600, 500))
 
             for ri ∈ run_info
                 if length(run_info) == 1
@@ -7037,57 +7352,113 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
                 else
                     prefix = ri.run_name * " "
                 end
+                if it !== nothing
+                    time = ri.time[it]
+                else
+                    time = ri.time
+                end
 
                 limit_caused_by_per_output = get_variable(ri,
                                                           "limit_caused_by_per_output";
                                                           it=it)
                 counter = 0
 
-                # Accuracy limit counter
-                counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
-                        label=prefix * "RK accuracy", ax=ax)
-
                 # Maximum timestep increase limit counter
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
                         label=prefix * "max timestep increase", ax=ax)
 
                 # Slower maximum timestep increase near last failure limit counter
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
                         label=prefix * "max timestep increase near last fail", ax=ax)
 
                 # Minimum timestep limit counter
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
                         label=prefix * "min timestep", ax=ax)
 
                 # Maximum timestep limit counter
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
                         label=prefix * "max timestep", ax=ax)
 
-                # Ion z advection
+                # High nonlinear iterations count
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
-                        label=prefix * "ion z advect", ax=ax)
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                        label=prefix * "high nl iterations", ax=ax)
 
-                # Ion vpa advection
+                # Accuracy limit counters
                 counter += 1
-                plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
-                        label=prefix * "ion vpa advect", ax=ax)
+                plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                        label=prefix * "ion pdf RK accuracy", ax=ax, linestyle=:dash)
+                if ri.evolve_density
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "ion density RK accuracy", ax=ax,
+                            linestyle=:dash)
+                end
+                if ri.evolve_upar
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "ion upar RK accuracy", ax=ax,
+                            linestyle=:dash)
+                end
+                if ri.evolve_ppar
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "ion ppar RK accuracy", ax=ax,
+                            linestyle=:dash)
+                end
+                if ri.n_neutral_species > 0
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "neutral pdf RK accuracy", ax=ax,
+                            linestyle=:dash)
+                    if ri.evolve_density
+                        counter += 1
+                        plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                                label=prefix * "neutral density RK accuracy", ax=ax,
+                                linestyle=:dash)
+                    end
+                    if ri.evolve_upar
+                        counter += 1
+                        plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                                label=prefix * "neutral uz RK accuracy", ax=ax,
+                                linestyle=:dash)
+                    end
+                    if ri.evolve_ppar
+                        counter += 1
+                        plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                                label=prefix * "neutral pz RK accuracy", ax=ax,
+                                linestyle=:dash)
+                    end
+                end
+
+                if !(occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"])
+                    # Ion z advection
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "ion z advect", ax=ax, linestyle=:dot)
+                end
+
+                if !(occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]))
+                    # Ion vpa advection
+                    counter += 1
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "ion vpa advect", ax=ax, linestyle=:dot)
+                end
 
                 if ri.n_neutral_species > 0
                     # Ion z advection
                     counter += 1
-                    plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
-                            label=prefix * "neutral z advect", ax=ax)
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "neutral z advect", ax=ax, linestyle=:dot)
 
                     # Ion vpa advection
                     counter += 1
-                    plot_1d(ri.time, @view limit_caused_by_per_output[counter,:];
-                            label=prefix * "neutral vz advect", ax=ax)
+                    plot_1d(time, @view limit_caused_by_per_output[counter,:];
+                            label=prefix * "neutral vz advect", ax=ax, linestyle=:dot)
                 end
 
                 if counter > size(limit_caused_by_per_output, 1)
@@ -7103,6 +7474,41 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
 
             put_legend_right(limits_fig, ax)
 
+            # Plot nonlinear solver diagnostics (if any)
+            nl_solvers_fig, ax = get_1d_ax(; xlabel="time", ylabel="iterations per solve/nonlinear-iteration")
+            has_nl_solver = false
+
+            for ri ∈ run_info
+                if length(run_info) == 1
+                    prefix = ""
+                else
+                    prefix = ri.run_name * " "
+                end
+                if it !== nothing
+                    time = ri.time[it]
+                else
+                    time = ri.time
+                end
+
+                nl_nonlinear_iterations_names = Tuple(v for v ∈ ri.variable_names
+                                                      if occursin("_nonlinear_iterations", v))
+                if nl_nonlinear_iterations_names != ()
+                    has_nl_solver = true
+                    nl_prefixes = (split(v, "_nonlinear_iterations")[1]
+                                   for v ∈ nl_nonlinear_iterations_names)
+                    for p ∈ nl_prefixes
+                        nonlinear_iterations = get_variable(ri, "$(p)_nonlinear_iterations_per_solve")
+                        linear_iterations = get_variable(ri, "$(p)_linear_iterations_per_nonlinear_iteration")
+                        plot_1d(time, nonlinear_iterations, label=prefix * " " * p * " NL per solve", ax=ax)
+                        plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax)
+                    end
+                end
+            end
+
+            if has_nl_solver
+                put_legend_right(nl_solvers_fig, ax)
+            end
+
 
             if plot_prefix !== nothing
                 outfile = plot_prefix * "timestep_diagnostics.pdf"
@@ -7113,11 +7519,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing)
 
                 outfile = plot_prefix * "timestep_limits.pdf"
                 save(outfile, limits_fig)
+
+                if has_nl_solver
+                    outfile = plot_prefix * "nonlinear_solver_iterations.pdf"
+                    save(outfile, nl_solvers_fig)
+                end
             else
                 display(steps_fig)
                 display(dt_fig)
                 display(CFL_fig)
                 display(limits_fig)
+                if has_nl_solver
+                    display(nl_solvers_fig)
+                end
             end
         end
 
diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml
index 45d1af96d..8de860c52 100644
--- a/moment_kinetics/Project.toml
+++ b/moment_kinetics/Project.toml
@@ -17,6 +17,7 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882"
 LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
+MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
 Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
diff --git a/moment_kinetics/src/calculus.jl b/moment_kinetics/src/calculus.jl
index fbff04a2f..d2c16a478 100644
--- a/moment_kinetics/src/calculus.jl
+++ b/moment_kinetics/src/calculus.jl
@@ -362,7 +362,10 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims},
     # test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x'
     # test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints
     #println("DEBUG MESSAGE: coord.name: ",coord.name," Ndims: ",Ndims," key: ",key)
-    if coord.name == "z" && Ndims==2
+    if coord.name == "z" && Ndims==1
+        df1d[j] = receive_buffer[]
+        #println("ASSIGNING DATA")
+    elseif coord.name == "z" && Ndims==2
         df1d[j,:] .= receive_buffer[:]
         #println("ASSIGNING DATA")
     elseif coord.name == "z" && Ndims==3
@@ -374,6 +377,9 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims},
     elseif coord.name == "z" && Ndims==6
         df1d[:,:,:,j,:,:] .= receive_buffer[:,:,:,:,:]
         #println("ASSIGNING DATA")
+    elseif coord.name == "r" && Ndims==1
+        df1d[j] = receive_buffer[]
+        #println("ASSIGNING DATA")
     elseif coord.name == "r" && Ndims==2
         df1d[:,j] .= receive_buffer[:]
         #println("ASSIGNING DATA")
diff --git a/moment_kinetics/src/charge_exchange.jl b/moment_kinetics/src/charge_exchange.jl
index 66c1bb7fa..e70782c8c 100644
--- a/moment_kinetics/src/charge_exchange.jl
+++ b/moment_kinetics/src/charge_exchange.jl
@@ -9,12 +9,12 @@ using ..looping
 using ..interpolation: interpolate_to_grid_vpa!
 
 """
-update the evolved pdf for each ion and electron species to account for
-charge exchange collisions between ions and neutrals
+update the evolved pdf for each ion species to account for charge exchange collisions
+between ions and neutrals
 """
-function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments,
-                                        composition, vpa, vz, charge_exchange_frequency,
-                                        vpa_spectral, vz_spectral, dt)
+function ion_charge_exchange_collisions_1V!(f_out, fvec_in, moments, composition, vpa, vz,
+                                            charge_exchange_frequency, vpa_spectral,
+                                            vz_spectral, dt)
     # This routine assumes a 1D model with:
     # nvz = nvpa and identical vz and vpa grids 
 
@@ -32,19 +32,6 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments,
                 moments.neutral.vth[:,:,is], moments, vpa, vz, charge_exchange_frequency,
                 vz_spectral, dt)
         end
-
-        begin_sn_r_z_region(no_synchronize=true)
-        @loop_sn isn begin
-            # apply CX collisions to all neutral species
-            # for each neutral species, obtain affect of charge exchange collisions
-            # with the corresponding ion species
-            @views charge_exchange_collisions_single_species!(
-                f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn],
-                fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn],
-                fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn],
-                moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments,
-                vz, vpa, charge_exchange_frequency, vpa_spectral, dt)
-        end
     else
         begin_s_r_z_region()
         @loop_s is begin
@@ -58,8 +45,35 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments,
                         - fvec_in.pdf[ivpa,1,iz,ir,is]*fvec_in.density_neutral[iz,ir,is])
             end
         end
+    end
+end
+
+"""
+update the evolved pdf for each neutral species to account for charge exchange collisions
+between ions and neutrals
+"""
+function neutral_charge_exchange_collisions_1V!(f_neutral_out, fvec_in, moments,
+                                                composition, vpa, vz,
+                                                charge_exchange_frequency, vpa_spectral,
+                                                vz_spectral, dt)
+    # This routine assumes a 1D model with:
+    # nvz = nvpa and identical vz and vpa grids
 
-        begin_sn_r_z_region(no_synchronize=true)
+    if moments.evolve_density
+        begin_sn_r_z_region()
+        @loop_sn isn begin
+            # apply CX collisions to all neutral species
+            # for each neutral species, obtain affect of charge exchange collisions
+            # with the corresponding ion species
+            @views charge_exchange_collisions_single_species!(
+                f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn],
+                fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn],
+                fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn],
+                moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments,
+                vz, vpa, charge_exchange_frequency, vpa_spectral, dt)
+        end
+    else
+        begin_sn_r_z_region()
         @loop_sn isn begin
             # apply CX collisions to all neutral species
             # for each neutral species, obtain affect of charge exchange collisions
@@ -135,21 +149,10 @@ function charge_exchange_collisions_single_species!(f_out, pdf_in, pdf_other,
     end
 end
 
-function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, f_ion_vrvzvzeta_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r,
-                                     charge_exchange_frequency, dt)
+function ion_charge_exchange_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition,
+                                            vz, vr, vzeta, vpa, vperp, z, r,
+                                            charge_exchange_frequency, dt)
     # This routine assumes a 3V model with:
-    @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
-    @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
-    @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
-    @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out))
-    @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out))
-    @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out))
-    @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in))
-    @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in))
-    @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in))
-    @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in))
-    @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in))
-    @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in))
     @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out))
     @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out))
     @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out))
@@ -173,6 +176,26 @@ function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in,
                     - fvec_in.pdf[ivpa,ivperp,iz,ir,is]*fvec_in.density_neutral[iz,ir,isn])
         end
     end
+end
+
+function neutral_charge_exchange_collisions_3V!(f_neutral_out, f_ion_vrvzvzeta_in,
+                                                fvec_in, composition, vz, vr, vzeta, vpa,
+                                                vperp, z, r, charge_exchange_frequency,
+                                                dt)
+    # This routine assumes a 3V model with:
+    @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
+    @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
+    @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
+    @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out))
+    @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out))
+    @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out))
+    @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in))
+    @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in))
+    @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in))
+    @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in))
+    @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in))
+    @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in))
+
     begin_sn_r_z_vzeta_vr_vz_region()
     @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
         # apply CX collisions to all neutral species
diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl
index 5d5531d84..d034a2dd0 100644
--- a/moment_kinetics/src/coordinates.jl
+++ b/moment_kinetics/src/coordinates.jl
@@ -77,6 +77,14 @@ struct coordinate{T <: AbstractVector{mk_float}}
     scratch2::Array{mk_float,1}
     # scratch3 is an array used for intermediate calculations requiring n entries
     scratch3::Array{mk_float,1}
+    # scratch4 is an array used for intermediate calculations requiring n entries
+    scratch4::Array{mk_float,1}
+    # scratch5 is an array used for intermediate calculations requiring n entries
+    scratch5::Array{mk_float,1}
+    # scratch6 is an array used for intermediate calculations requiring n entries
+    scratch6::Array{mk_float,1}
+    # scratch7 is an array used for intermediate calculations requiring n entries
+    scratch7::Array{mk_float,1}
     # scratch_shared is a shared-memory array used for intermediate calculations requiring
     # n entries
     scratch_shared::T
@@ -221,10 +229,12 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing
     coord = coordinate(input.name, n_global, n_local, input.ngrid,
         input.nelement_global, input.nelement_local, input.nrank, input.irank, input.L, grid,
         cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option,
-        input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), scratch_shared, scratch_shared2,
-        scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm,
-        local_io_range, global_io_range, element_scale, element_shift, input.element_spacing_option,
-        element_boundaries, radau_first_element, other_nodes, one_over_denominator)
+        input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch),
+        copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch),
+        scratch_shared, scratch_shared2, scratch_2d, copy(scratch_2d), advection,
+        send_buffer, receive_buffer, input.comm, local_io_range, global_io_range,
+        element_scale, element_shift, input.element_spacing_option, element_boundaries,
+        radau_first_element, other_nodes, one_over_denominator)
 
     if coord.n == 1 && occursin("v", coord.name)
         spectral = null_velocity_dimension_info()
@@ -242,7 +252,8 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing
     elseif input.discretization == "gausslegendre_pseudospectral"
         # create arrays needed for explicit GaussLegendre pseudospectral treatment in this
         # coordinate and create the matrices for differentiation
-        spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim)
+        spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim,
+                                                      dirichlet_bc=occursin("zero", coord.bc))
         # obtain the local derivatives of the uniform grid with respect to the used grid
         derivative!(coord.duniform_dgrid, coord.uniform_grid, coord, spectral)
     else
diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl
index e85e91158..c3e2c0523 100644
--- a/moment_kinetics/src/derivatives.jl
+++ b/moment_kinetics/src/derivatives.jl
@@ -149,6 +149,33 @@ dfns (ion) -> [vpa,vperp,z,r,s]
 dfns (neutrals) -> [vz,vr,vzeta,z,r,sn]
 """
 
+#df/dz
+#1D version for f[z], used by implicit solvers
+function derivative_z!(dfdz::AbstractArray{mk_float,1}, f::AbstractArray{mk_float,1},
+        dfdz_lower_endpoints::AbstractArray{mk_float,0},
+        dfdz_upper_endpoints::AbstractArray{mk_float,0},
+        z_send_buffer::AbstractArray{mk_float,0},
+        z_receive_buffer::AbstractArray{mk_float,0}, z_spectral, z)
+
+    begin_serial_region()
+
+    @serial_region begin
+        # differentiate f w.r.t z
+        derivative!(dfdz, f, z, z_spectral)
+        # get external endpoints to reconcile via MPI
+        dfdz_lower_endpoints[] = z.scratch_2d[1,1]
+        dfdz_upper_endpoints[] = z.scratch_2d[end,end]
+    end
+
+    # now reconcile element boundaries across
+    # processes with large message involving all y
+    if z.nelement_local < z.nelement_global
+        reconcile_element_boundaries_MPI!(
+            dfdz, dfdz_lower_endpoints, dfdz_upper_endpoints, z_send_buffer,
+            z_receive_buffer, z)
+    end
+end
+
 #df/dz
 #2D version for f[z,r] -> Er, Ez, phi
 function derivative_z!(dfdz::AbstractArray{mk_float,2}, f::AbstractArray{mk_float,2},
diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl
index bae07d9bc..1aea757ce 100644
--- a/moment_kinetics/src/file_io.jl
+++ b/moment_kinetics/src/file_io.jl
@@ -55,7 +55,7 @@ moments & fields only
 struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower,
                        Tchodura_upper, Texti1, Texti2, Texti3, Texti4,
                        Texti5, Textn1, Textn2, Textn3, Textn4, Textn5, Tconstri, Tconstrn,
-                       Tint, Tfailcause}
+                       Tint, Tfailcause, Tnldiagnostics}
     # file identifier for the binary file to which data is written
     fid::Tfile
     # handle for the time variable
@@ -126,6 +126,10 @@ struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower,
     # Last successful timestep before most recent timestep failure, used by adaptve
     # timestepping algorithm
     dt_before_last_fail::Ttime
+    # Variables recording diagnostic information about non-linear solvers (used for
+    # implicit parts of timestep). These are stored in nested NamedTuples so that we can
+    # write diagnostics generically for as many nonlinear solvers as are created.
+    nl_solver_diagnostics::Tnldiagnostics
 
     # Use parallel I/O?
     parallel_io::Bool
@@ -194,7 +198,7 @@ open the necessary output files
 function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r,
                        composition, collisions, evolve_density, evolve_upar, evolve_ppar,
                        external_source_settings, input_dict, restart_time_index,
-                       previous_runs_info, time_for_setup)
+                       previous_runs_info, time_for_setup, t_params, nl_solver_params)
     begin_serial_region()
     @serial_region begin
         # Only read/write from first process in each 'block'
@@ -222,13 +226,14 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe
                                       external_source_settings, input_dict,
                                       io_input.parallel_io, comm_inter_block[], run_id,
                                       restart_time_index, previous_runs_info,
-                                      time_for_setup)
+                                      time_for_setup, t_params, nl_solver_params)
         io_dfns = setup_dfns_io(out_prefix, io_input.binary_format,
                                 boundary_distributions, r, z, vperp, vpa, vzeta, vr, vz,
                                 composition, collisions, evolve_density, evolve_upar,
                                 evolve_ppar, external_source_settings, input_dict,
                                 io_input.parallel_io, comm_inter_block[], run_id,
-                                restart_time_index, previous_runs_info, time_for_setup)
+                                restart_time_index, previous_runs_info, time_for_setup,
+                                t_params, nl_solver_params)
 
         return ascii, io_moments, io_dfns
     end
@@ -644,7 +649,8 @@ define dynamic (time-evolving) moment variables for writing to the hdf5 file
 function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
                                           r::coordinate, z::coordinate, parallel_io,
                                           external_source_settings, evolve_density,
-                                          evolve_upar, evolve_ppar)
+                                          evolve_upar, evolve_ppar, t_params,
+                                          nl_solver_params)
     @serial_region begin
         dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables")
 
@@ -694,19 +700,13 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
             dynamic, "failure_counter", mk_int; parallel_io=parallel_io,
             description="cumulative number of timestep failures for the run")
 
-        n_failure_vars = 1 + evolve_density + evolve_upar + evolve_ppar
-        if n_neutral_species > 0
-            n_failure_vars *= 2
-        end
+        n_failure_vars = length(t_params.failure_caused_by)
         io_failure_caused_by = create_dynamic_variable!(
             dynamic, "failure_caused_by", mk_int; diagnostic_var_size=n_failure_vars,
             parallel_io=parallel_io,
             description="cumulative count of how many times each variable caused a "
                         * "timestep failure for the run")
-        n_limit_vars = 5 + 2
-        if n_neutral_species > 0
-            n_limit_vars += 2
-        end
+        n_limit_vars = length(t_params.limit_caused_by)
         io_limit_caused_by = create_dynamic_variable!(
             dynamic, "limit_caused_by", mk_int; diagnostic_var_size=n_limit_vars,
             parallel_io=parallel_io,
@@ -718,6 +718,21 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
             description="Last successful timestep before most recent timestep failure, "
                         * "used by adaptve timestepping algorithm")
 
+        io_nl_solver_diagnostics = NamedTuple(
+            term=>(n_solves=create_dynamic_variable!(
+                                dynamic, "$(term)_n_solves", mk_int; parallel_io=parallel_io,
+                                description="Number of nonlinear solves for $term"),
+                   nonlinear_iterations=create_dynamic_variable!(
+                                            dynamic, "$(term)_nonlinear_iterations", mk_int;
+                                            parallel_io=parallel_io,
+                                            description="Number of nonlinear iterations for $term"),
+                   linear_iterations=create_dynamic_variable!(
+                                         dynamic, "$(term)_linear_iterations", mk_int;
+                                         parallel_io=parallel_io,
+                                         description="Number of linear iterations for $term"),
+                  )
+            for (term, params) ∈ pairs(nl_solver_params) if params !== nothing)
+
         return io_moments_info(fid, io_time, io_phi, io_Er, io_Ez, io_density, io_upar,
                                io_ppar, io_pperp, io_qpar, io_vth, io_dSdt, io_chodura_lower, io_chodura_upper,
                                io_density_neutral, io_uz_neutral,
@@ -740,7 +755,8 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
                                neutral_constraints_C_coefficient,
                                io_time_for_run, io_step_counter, io_dt,
                                io_failure_counter, io_failure_caused_by,
-                               io_limit_caused_by, io_dt_before_last_fail, parallel_io)
+                               io_limit_caused_by, io_dt_before_last_fail, io_nl_solver_diagnostics,
+                               parallel_io)
     end
 
     # For processes other than the root process of each shared-memory group...
@@ -1073,7 +1089,8 @@ file
 """
 function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, composition,
                                        parallel_io, external_source_settings,
-                                       evolve_density, evolve_upar, evolve_ppar)
+                                       evolve_density, evolve_upar, evolve_ppar, t_params,
+                                       nl_solver_params)
 
     @serial_region begin
         io_moments = define_dynamic_moment_variables!(fid, composition.n_ion_species,
@@ -1081,7 +1098,8 @@ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, com
                                                       parallel_io,
                                                       external_source_settings,
                                                       evolve_density, evolve_upar,
-                                                      evolve_ppar)
+                                                      evolve_ppar, t_params,
+                                                      nl_solver_params)
 
         dynamic = get_group(fid, "dynamic_data")
 
@@ -1152,7 +1170,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z
                           composition, collisions, evolve_density, evolve_upar,
                           evolve_ppar, external_source_settings, input_dict, parallel_io,
                           io_comm, run_id, restart_time_index, previous_runs_info,
-                          time_for_setup)
+                          time_for_setup, t_params, nl_solver_params)
     @serial_region begin
         moments_prefix = string(prefix, ".moments")
         if !parallel_io
@@ -1182,7 +1200,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z
         io_moments = define_dynamic_moment_variables!(
             fid, composition.n_ion_species, composition.n_neutral_species, r, z,
             parallel_io, external_source_settings, evolve_density, evolve_upar,
-            evolve_ppar)
+            evolve_ppar, t_params, nl_solver_params)
 
         close(fid)
 
@@ -1206,6 +1224,15 @@ function reopen_moments_io(file_info)
         function getvar(name)
             if name ∈ variable_list
                 return dyn[name]
+            elseif name == "nl_solver_diagnostics"
+                nl_names = (name for name ∈ variable_list
+                            if occursin("_nonlinear_iterations", name))
+                nl_prefixes = (split(name, "_nonlinear_iterations")[1]
+                               for name ∈ nl_names)
+                return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"],
+                                                 nonlinear_iterations=dyn["$(term)_nonlinear_iterations"],
+                                                 linear_iterations=dyn["$(term)_linear_iterations"])
+                                  for term ∈ nl_prefixes)
             else
                 return nothing
             end
@@ -1238,7 +1265,8 @@ function reopen_moments_io(file_info)
                                getvar("time_for_run"), getvar("step_counter"),
                                getvar("dt"), getvar("failure_counter"),
                                getvar("failure_caused_by"), getvar("limit_caused_by"),
-                               getvar("dt_before_last_fail"), parallel_io)
+                               getvar("dt_before_last_fail"),
+                               getvar("nl_solver_diagnostics"), parallel_io)
     end
 
     # For processes other than the root process of each shared-memory group...
@@ -1252,7 +1280,7 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper
                        vzeta, vr, vz, composition, collisions, evolve_density,
                        evolve_upar, evolve_ppar, external_source_settings, input_dict,
                        parallel_io, io_comm, run_id, restart_time_index,
-                       previous_runs_info, time_for_setup)
+                       previous_runs_info, time_for_setup, t_params, nl_solver_params)
 
     @serial_region begin
         dfns_prefix = string(prefix, ".dfns")
@@ -1288,7 +1316,8 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper
         ### in a struct for later access ###
         io_dfns = define_dynamic_dfn_variables!(
             fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io,
-            external_source_settings, evolve_density, evolve_upar, evolve_ppar)
+            external_source_settings, evolve_density, evolve_upar, evolve_ppar, t_params,
+            nl_solver_params)
 
         close(fid)
 
@@ -1312,6 +1341,15 @@ function reopen_dfns_io(file_info)
         function getvar(name)
             if name ∈ variable_list
                 return dyn[name]
+            elseif name == "nl_solver_diagnostics"
+                nl_names = (name for name ∈ variable_list
+                            if occursin("_nonlinear_iterations", name))
+                nl_prefixes = (split(name, "_nonlinear_iterations")[1]
+                               for name ∈ nl_names)
+                return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"],
+                                                 nonlinear_iterations=dyn["$(term)_nonlinear_iterations"],
+                                                 linear_iterations=dyn["$(term)_linear_iterations"])
+                                  for term ∈ nl_prefixes)
             else
                 return nothing
             end
@@ -1346,7 +1384,8 @@ function reopen_dfns_io(file_info)
                                      getvar("dt"), getvar("failure_counter"),
                                      getvar("failure_caused_by"),
                                      getvar("limit_caused_by"),
-                                     getvar("dt_before_last_fail"), parallel_io)
+                                     getvar("dt_before_last_fail"),
+                                     getvar("nl_solver_diagnostics"), parallel_io)
 
         return io_dfns_info(fid, getvar("f"), getvar("f_neutral"), parallel_io,
                             io_moments)
@@ -1382,7 +1421,9 @@ write time-dependent moments data for ions and neutrals to the binary output fil
 """
 function write_all_moments_data_to_binary(moments, fields, t, n_ion_species,
                                           n_neutral_species, io_or_file_info_moments,
-                                          t_idx, time_for_run, t_params, r, z)
+                                          t_idx, time_for_run, t_params, nl_solver_params,
+                                          r, z)
+
     @serial_region begin
         # Only read/write from first process in each 'block'
 
@@ -1419,6 +1460,17 @@ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species,
                               only_root=true)
         append_to_dynamic_var(io_moments.dt_before_last_fail,
                               t_params.dt_before_last_fail[], t_idx, parallel_io)
+        for (k,v) ∈ pairs(nl_solver_params)
+            if v === nothing
+                continue
+            end
+            append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].n_solves,
+                                  v.n_solves[], t_idx, parallel_io)
+            append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].nonlinear_iterations,
+                                  v.nonlinear_iterations[], t_idx, parallel_io)
+            append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].linear_iterations,
+                                  v.linear_iterations[], t_idx, parallel_io)
+        end
 
         closefile && close(io_moments.fid)
     end
@@ -1619,8 +1671,8 @@ binary output file
 """
 function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species,
                                        n_neutral_species, io_or_file_info_dfns, t_idx,
-                                       time_for_run, t_params, r, z, vperp, vpa, vzeta, vr,
-                                       vz)
+                                       time_for_run, t_params, nl_solver_params, r, z,
+                                       vperp, vpa, vzeta, vr, vz)
     @serial_region begin
         # Only read/write from first process in each 'block'
 
@@ -1636,7 +1688,7 @@ function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species,
         # This also updates the time.
         write_all_moments_data_to_binary(moments, fields, t, n_ion_species,
                                          n_neutral_species, io_dfns.io_moments, t_idx,
-                                         time_for_run, t_params, r, z)
+                                         time_for_run, t_params, nl_solver_params, r, z)
 
         # add the distribution function data at this time slice to the output file
         write_ion_dfns_data_to_binary(pdf.ion.norm, n_ion_species, io_dfns, t_idx, r, z,
@@ -1901,7 +1953,7 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor
                     #qr_neutral=nothing, qzeta_neutral=nothing,
                     vth_neutral=nothing,
                     phi=nothing, Er=nothing, Ez=nothing,
-                    istage=0, label="")
+                    istage=0, label="", t_params=nothing, nl_solver_params=())
     global debug_output_file
 
     # Only read/write from first process in each 'block'
@@ -1933,11 +1985,12 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor
                                                           r, z, false,
                                                           external_source_settings,
                                                           evolve_density, evolve_upar,
-                                                          evolve_ppar)
+                                                          evolve_ppar, t_params,
+                                                          nl_solver_params)
             io_dfns = define_dynamic_dfn_variables!(
                 fid, r, z, vperp, vpa, vzeta, vr, vz, composition.n_ion_species,
                 composition.n_neutral_species, false, external_source_settings,
-                evolve_density, evolve_upar, evolve_ppar)
+                evolve_density, evolve_upar, evolve_ppar, t_params, nl_solver_params)
 
             # create the "istage" variable, used to identify the rk stage where
             # `debug_dump()` was called
diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl
index 24a0b925f..539a5fd86 100644
--- a/moment_kinetics/src/gauss_legendre.jl
+++ b/moment_kinetics/src/gauss_legendre.jl
@@ -100,7 +100,7 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info
     Qmat::Array{mk_float,2}
 end
 
-function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true)
+function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, dirichlet_bc=true)
     lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,collision_operator_dim=collision_operator_dim)
     radau = setup_gausslegendre_pseudospectral_radau(coord,collision_operator_dim=collision_operator_dim)
 
@@ -114,9 +114,9 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true)
     K_matrix = allocate_float(coord.n,coord.n)
     L_matrix = allocate_float(coord.n,coord.n)
 
-    setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M")
-    setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms")
-    setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms")
+    setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc)
+    setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc)
+    setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc)
     mass_matrix_lu = lu(sparse(mass_matrix))
     Qmat = allocate_float(coord.ngrid,coord.ngrid)
 
@@ -835,7 +835,7 @@ where M is the mass matrix and K is the stiffness matrix.
 function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2},
                                lobatto::gausslegendre_base_info,
                                radau::gausslegendre_base_info, 
-                               coord,option)
+                               coord,option; dirichlet_bc=false)
     QQ_j = allocate_float(coord.ngrid,coord.ngrid)
     QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid)
     
@@ -883,6 +883,19 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2},
             QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0
         end
     end
+
+    if dirichlet_bc
+        # Make matrix diagonal for first/last grid points so it does not change the values
+        # there
+        if coord.irank == 0
+            QQ_global[1,:] .= 0.0
+            QQ_global[1,1] = 1.0
+        end
+        if coord.irank == coord.nrank - 1
+            QQ_global[end,:] .= 0.0
+            QQ_global[end,end] = 1.0
+        end
+    end
         
     return nothing
 end
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index fe7661e9a..f81d28a30 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -38,7 +38,8 @@ end
 an option but known at compile time when a `time_info` struct is passed as a function
 argument.
 """
-struct time_info{Terrorsum <: Real}
+struct time_info{Terrorsum <: Real, Trkimp, Timpzero}
+    n_variables::mk_int
     nstep::mk_int
     end_time::mk_float
     dt::MPISharedArray{mk_float,1}
@@ -52,10 +53,14 @@ struct time_info{Terrorsum <: Real}
     failure_counter::Ref{mk_int}
     failure_caused_by::Vector{mk_int}
     limit_caused_by::Vector{mk_int}
+    nwrite_moments::mk_int
+    nwrite_dfns::mk_int
     moments_output_times::Vector{mk_float}
     dfns_output_times::Vector{mk_float}
     type::String
     rk_coefs::Array{mk_float,2}
+    rk_coefs_implicit::Trkimp
+    implicit_coefficient_is_zero::Timpzero
     n_rk_stages::mk_int
     rk_order::mk_int
     adaptive::Bool
@@ -69,6 +74,9 @@ struct time_info{Terrorsum <: Real}
     last_fail_proximity_factor::mk_float
     minimum_dt::mk_float
     maximum_dt::mk_float
+    implicit_ion_advance::Bool
+    implicit_vpa_advection::Bool
+    write_after_fixed_step_count::Bool
     error_sum_zero::Terrorsum
     split_operators::Bool
     steady_state_residual::Bool
@@ -87,15 +95,20 @@ mutable struct advance_info
     neutral_z_advection::Bool
     neutral_r_advection::Bool
     neutral_vz_advection::Bool
-    cx_collisions::Bool
-    cx_collisions_1V::Bool
-    ionization_collisions::Bool
-    ionization_collisions_1V::Bool
+    ion_cx_collisions::Bool
+    neutral_cx_collisions::Bool
+    ion_cx_collisions_1V::Bool
+    neutral_cx_collisions_1V::Bool
+    ion_ionization_collisions::Bool
+    neutral_ionization_collisions::Bool
+    ion_ionization_collisions_1V::Bool
+    neutral_ionization_collisions_1V::Bool
     ionization_source::Bool
     krook_collisions_ii::Bool
     explicit_weakform_fp_collisions::Bool
     external_source::Bool
-    numerical_dissipation::Bool
+    ion_numerical_dissipation::Bool
+    neutral_numerical_dissipation::Bool
     source_terms::Bool
     continuity::Bool
     force_balance::Bool
diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl
index a4e7ac3f1..babdd5d9b 100644
--- a/moment_kinetics/src/ionization.jl
+++ b/moment_kinetics/src/ionization.jl
@@ -2,8 +2,10 @@
 """
 module ionization
 
-export ionization_collisions_1V!
-export ionization_collisions_3V!
+export ion_ionization_collisions_1V!
+export neutral_ionization_collisions_1V!
+export ion_ionization_collisions_3V!
+export neutral_ionization_collisions_3V!
 export constant_ionization_source!
 
 using ..interpolation: interpolate_to_grid_vpa!
@@ -64,18 +66,12 @@ function constant_ionization_source!(f_out, fvec_in, vpa, vperp, z, r, moments,
     end
 end 
 
-function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp, z, r,
-                                   vz_spectral, moments, composition, collisions, dt)
+function ion_ionization_collisions_1V!(f_out, fvec_in, vz, vpa, vperp, z, r, vz_spectral,
+                                       moments, composition, collisions, dt)
     # This routine assumes a 1D model with:
     # nvz = nvpa and identical vz and vpa grids 
     # nvperp = nvr = nveta = 1
     # constant charge_exchange_frequency independent of species
-    @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
-    @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
-    @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
-    @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out))
-    @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out))
-    @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out))
     @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out))
     @boundscheck 1 == size(f_out,2) || throw(BoundsError(f_out))
     @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out))
@@ -83,8 +79,6 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp
     @boundscheck composition.n_ion_species == size(f_out,5) || throw(BoundsError(f_out))
     
     
-    # keep vpa vperp vz vr vzeta local so that
-    # vpa loop below can also be used for vz
     begin_r_z_vpa_region()
 
     if moments.evolve_density
@@ -155,21 +149,44 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp
             @loop_r_z_vpa ir iz ivpa begin
                 # apply ionization collisions to all ion species
                 f_out[ivpa,1,iz,ir,is] += dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is]
-                # apply ionization collisions to all neutral species
-                f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is]
             end
         end
     end
 end
 
-function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt)
-    # This routine assumes a 3V model with:
-    @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
-    @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
-    @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
+function neutral_ionization_collisions_1V!(f_neutral_out, fvec_in, vz, vpa, vperp, z, r,
+                                           vz_spectral, moments, composition, collisions, dt)
+    # This routine assumes a 1D model with:
+    # nvz = nvpa and identical vz and vpa grids
+    # nvperp = nvr = nveta = 1
+    # constant charge_exchange_frequency independent of species
+    @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
+    @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
+    @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
     @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out))
     @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out))
     @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out))
+
+    if !moments.evolve_density
+        begin_r_z_vpa_region()
+
+        @loop_s is begin
+            # ion ionisation rate =   < f_n > n_e R_ion
+            # neutral "ionisation" (depopulation) rate =   -  f_n  n_e R_ion
+            # no gyroaverage here as 1V code
+            #NB: used quasineutrality to replace electron density n_e with ion density
+            #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e)
+            isn = is
+            @loop_r_z_vpa ir iz ivpa begin
+                # apply ionization collisions to all neutral species
+                f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is]
+            end
+        end
+    end
+end
+
+function ion_ionization_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt)
+    # This routine assumes a 3V model with:
     @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out))
     @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out))
     @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out))
@@ -185,21 +202,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_
     
     begin_s_r_z_vperp_vpa_region()
 
-    #    #if collisions.constant_ionization_rate
-    #    #    ## Oddly the test in test/harrisonthompson.jl matches the analitical
-    #    #    ## solution (which assumes width=0.0) better with width=0.5 than with,
-    #    #    ## e.g., width=0.15. Possibly narrower widths would require more vpa
-    #    #    ## resolution, which then causes crashes due to overshoots giving
-    #    #    ## negative f??
-    #    #    #width = 0.5
-    #    #    #@loop_s is begin
-    #    #    #    #@loop_r_z_vperp_vpa ir iz ivperp ivpa begin
-    #    #    #    #    #f_out[ivpa,ivperp,iz,ir,is] += dt*collisions.ionization/width^3*exp(-((vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/width^2))
-    #    #    #    #end
-    #    #    #end
-    #    #    #return nothing
-    #    #end
-
     # ion ionization rate =   < f_n > n_e R_ion
     # neutral "ionization" (depopulation) rate =   -  f_n  n_e R_ion
     #NB: used quasineutrality to replace electron density n_e with ion density
@@ -213,6 +215,23 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_
             end
         end
     end
+end
+
+function neutral_ionization_collisions_3V!(f_neutral_out, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt)
+    # This routine assumes a 3V model with:
+    @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out))
+    @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out))
+    @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out))
+    @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out))
+    @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out))
+    @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out))
+
+    ionization_frequency = collisions.ionization
+
+    # ion ionization rate =   < f_n > n_e R_ion
+    # neutral "ionization" (depopulation) rate =   -  f_n  n_e R_ion
+    #NB: used quasineutrality to replace electron density n_e with ion density
+    #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e)
     begin_sn_r_z_vzeta_vr_vz_region()
     @loop_sn isn begin
         for is ∈ 1:composition.n_ion_species
@@ -222,7 +241,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_
             end
         end
     end
-
 end
 
 end
diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl
index 91992e9c6..430edb5a9 100644
--- a/moment_kinetics/src/load_data.jl
+++ b/moment_kinetics/src/load_data.jl
@@ -2722,6 +2722,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin
         vz_chunk_size = 1
     end
 
+    # Get variable names just from the first restart, for simplicity
+    variable_names = get_variable_keys(get_group(fids0[1], "dynamic_data"))
+
     if parallel_io
         files = fids0
     else
@@ -2733,9 +2736,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin
     run_info = (run_name=run_name, run_prefix=base_prefix, parallel_io=parallel_io,
                 ext=ext, nblocks=nblocks, files=files, input=input,
                 n_ion_species=n_ion_species, n_neutral_species=n_neutral_species,
-                evolve_moments=evolve_moments, composition=composition, species=species,
-                collisions=collisions, geometry=geometry, drive_input=drive_input,
-                num_diss_params=num_diss_params,
+                evolve_moments=evolve_moments, t_input=t_input, composition=composition,
+                species=species, collisions=collisions, geometry=geometry,
+                drive_input=drive_input, num_diss_params=num_diss_params,
                 external_source_settings=external_source_settings,
                 evolve_density=evolve_density, evolve_upar=evolve_upar,
                 evolve_ppar=evolve_ppar,
@@ -2749,7 +2752,8 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin
                 vz_spectral=vz_spectral, r_chunk_size=r_chunk_size,
                 z_chunk_size=z_chunk_size, vperp_chunk_size=vperp_chunk_size,
                 vpa_chunk_size=vpa_chunk_size, vzeta_chunk_size=vzeta_chunk_size,
-                vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, dfns=dfns)
+                vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size,
+                variable_names=variable_names, dfns=dfns)
 
     return run_info
 end
@@ -3283,6 +3287,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
 
         speed = allocate_float(nz, nvpa, nvperp, nr, nspecies, nt)
         Er = get_variable(run_info, "Er")
+        gEr = allocate_float(nvperp, nz, nr, nspecies, nt)
+        for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz
+            # Don't support gyroaveraging here (yet)
+            gEr[:,iz,ir,is,it] .= Er[iz,ir,it]
+        end
 
         setup_distributed_memory_MPI(1,1,1,1)
         setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz,
@@ -3293,11 +3302,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
             # Only need some struct with a 'speed' variable
             advect = (speed=@view(speed[:,:,:,:,is,it]),)
             # Only need Er
-            fields = (Er=@view(Er[:,:,it]),)
+            fields = (gEr=@view(gEr[:,:,:,is,it]),)
             @views update_speed_z!(advect, upar[:,:,is,it], vth[:,:,is,it],
                                    run_info.evolve_upar, run_info.evolve_ppar, fields,
                                    run_info.vpa, run_info.vperp, run_info.z, run_info.r,
-                                   run_info.time[it], run_info.geometry)
+                                   run_info.time[it], run_info.geometry, is)
         end
 
         # Horrible hack so that we can get the speed back without rearranging the
@@ -3331,9 +3340,6 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
             end
         end
     elseif variable_name == "vpa_advect_speed"
-        # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs
-        # to get_variable() in this case. Instead select a slice of the result.
-        Ez = get_variable(run_info, "Ez")
         density = get_variable(run_info, "density")
         upar = get_variable(run_info, "parallel_flow")
         ppar = get_variable(run_info, "parallel_pressure")
@@ -3347,9 +3353,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
         dqpar_dz = get_z_derivative(run_info, "parallel_heat_flux")
         if run_info.external_source_settings.ion.active
             external_source_amplitude = get_variable(run_info, "external_source_amplitude")
-            external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude")
-            external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude")
-            external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude")
+            if run_info.evolve_density
+                external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude")
+            else
+                external_source_density_amplitude = zeros(0,0,run_info.nt)
+            end
+            if run_info.evolve_upar
+                external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude")
+            else
+                external_source_momentum_amplitude = zeros(0,0,run_info.nt)
+            end
+            if run_info.evolve_ppar
+                external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude")
+            else
+                external_source_pressure_amplitude = zeros(0,0,run_info.nt)
+            end
         else
             external_source_amplitude = zeros(0,0,run_info.nt)
             external_source_density_amplitude = zeros(0,0,run_info.nt)
@@ -3361,6 +3379,15 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
         nvperp = run_info.vperp.n
         nvpa = run_info.vpa.n
 
+        # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs
+        # to get_variable() in this case. Instead select a slice of the result.
+        Ez = get_variable(run_info, "Ez")
+        gEz = allocate_float(nvperp, nz, nr, nspecies, nt)
+        for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz
+            # Don't support gyroaveraging here (yet)
+            gEz[:,iz,ir,is,it] .= Ez[iz,ir,it]
+        end
+
         speed=allocate_float(nvpa, nvperp, nz, nr, nspecies, nt)
         setup_distributed_memory_MPI(1,1,1,1)
         setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz,
@@ -3371,7 +3398,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
             # Only need some struct with a 'speed' variable
             advect = [(speed=@view(speed[:,:,:,:,is,it]),) for is ∈ 1:nspecies]
             # Only need Ez
-            fields = (Ez=@view(Ez[:,:,it]),)
+            fields = (gEz=@view(gEz[:,:,:,:,it]),)
             @views moments = (ion=(dppar_dz=dppar_dz[:,:,:,it],
                                    dupar_dz=dupar_dz[:,:,:,it],
                                    dvth_dz=dvth_dz[:,:,:,it],
@@ -3411,6 +3438,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
 
         speed = allocate_float(nz, nvz, nvr, nvzeta, nr, nspecies, nt)
 
+        setup_distributed_memory_MPI(1,1,1,1)
         setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz,
                            vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta,
                            vr=nvr, vz=nvz)
@@ -3474,9 +3502,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
         dqz_dz = get_z_derivative(run_info, "qz_neutral")
         if run_info.external_source_settings.neutral.active
             external_source_amplitude = get_variable(run_info, "external_source_neutral_amplitude")
-            external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude")
-            external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude")
-            external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude")
+            if run_info.evolve_density
+                external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude")
+            else
+                external_source_density_amplitude = zeros(0,0,run_info.nt)
+            end
+            if run_info.evolve_upar
+                external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude")
+            else
+                external_source_momentum_amplitude = zeros(0,0,run_info.nt)
+            end
+            if run_info.evolve_ppar
+                external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude")
+            else
+                external_source_pressure_amplitude = zeros(0,0,run_info.nt)
+            end
         else
             external_source_amplitude = zeros(0,0,run_info.nt)
             external_source_density_amplitude = zeros(0,0,run_info.nt)
@@ -3490,6 +3530,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
         nvz = run_info.vz.n
         speed = allocate_float(nvz, nvr, nvzeta, nz, nr, nspecies, nt)
 
+        setup_distributed_memory_MPI(1,1,1,1)
         setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz,
                            vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta,
                            vr=nvr, vz=nvz)
@@ -3676,6 +3717,20 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
             variable[it] = min_CFL
         end
         variable = select_slice_of_variable(variable; kwargs...)
+    elseif occursin("_nonlinear_iterations_per_solve", variable_name)
+        prefix = split(variable_name, "_nonlinear_iterations_per_solve")[1]
+        nl_nsolves = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_n_solves"; kwargs...)
+        nl_iterations = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_nonlinear_iterations"; kwargs...)
+        variable = nl_iterations ./ nl_nsolves
+    elseif occursin("_linear_iterations_per_nonlinear_iteration", variable_name)
+        prefix = split(variable_name, "_linear_iterations_per_nonlinear_iteration")[1]
+        nl_iterations = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_nonlinear_iterations"; kwargs...)
+        nl_linear_iterations = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_linear_iterations"; kwargs...)
+        variable = nl_linear_iterations ./ nl_iterations
     else
         variable = postproc_load_variable(run_info, variable_name; kwargs...)
     end
diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl
index ae49821dd..f8c0a2274 100644
--- a/moment_kinetics/src/moment_constraints.jl
+++ b/moment_kinetics/src/moment_constraints.jl
@@ -7,6 +7,7 @@ module moment_constraints
 
 using ..communication: _block_synchronize
 using ..looping
+using ..type_definitions: mk_float
 using ..velocity_moments: integrate_over_vspace, update_qpar!
 
 export hard_force_moment_constraints!, hard_force_moment_constraints_neutral!
@@ -75,12 +76,26 @@ function hard_force_moment_constraints!(f, moments, vpa)
         A = 1.0 / I0
         @. f1d = A * f1d
 
+        B = NaN
+        C = NaN
+    else
+        A = NaN
         B = NaN
         C = NaN
     end
 
     return A, B, C
 end
+function hard_force_moment_constraints!(f::AbstractArray{mk_float,5}, moments, vpa)
+    A = moments.ion.constraints_A_coefficient
+    B = moments.ion.constraints_B_coefficient
+    C = moments.ion.constraints_C_coefficient
+    begin_s_r_z_region()
+    @loop_s_r_z is ir iz begin
+        A[iz,ir,is], B[iz,ir,is], C[iz,ir,is] =
+            hard_force_moment_constraints!(@view(f[:,:,iz,ir,is]), moments, vpa)
+    end
+end
 
 """
     hard_force_moment_constraints_neutral!(f, moments, vz)
@@ -125,6 +140,88 @@ function hard_force_moment_constraints_neutral!(f, moments, vz)
         A = 1.0 / I0
         @. f1d = A * f1d
 
+        B = NaN
+        C = NaN
+    else
+        A = NaN
+        B = NaN
+        C = NaN
+    end
+
+    return A, B, C
+end
+function hard_force_moment_constraints_neutral!(f::AbstractArray{mk_float,6}, moments, vz)
+    A = moments.neutral.constraints_A_coefficient
+    B = moments.neutral.constraints_B_coefficient
+    C = moments.neutral.constraints_C_coefficient
+    begin_sn_r_z_region()
+    @loop_sn_r_z isn ir iz begin
+        A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn] =
+            hard_force_moment_constraints_neutral!(@view(f[:,:,:,iz,ir,is]), moments, vz)
+    end
+end
+
+"""
+    moment_constraints_on_residual!(residual, f, moments, vpa)
+
+A 'residual' (used in implicit timestepping) is an update to the distribution function
+\$f_\\mathrm{new} = f_\\mathrm{old} + \\mathtt{residual}\$. \$f_\\mathrm{new}\$ should
+obey the moment constraints ([Constraints on normalized distribution function](@ref)), and
+\$f_\\mathrm{old}\$ already obeys the constraints, which means that the first 3 moments of
+`residual` should be zero. We impose this constraint by adding corrections proportional to
+`f`.
+```math
+r = \\hat{r} + (A + B w_{\\|} + C w_{\\|}^2) f
+```
+
+Note this function assumes the input is given at a single spatial position.
+"""
+function moment_constraints_on_residual!(residual::AbstractArray{T,N},
+                                         f::AbstractArray{T,N}, moments, vpa) where {T,N}
+    if N == 2
+        f = @view f[:,1]
+        residual = @view residual[:,1]
+    end
+    if moments.evolve_ppar
+        I0 = integrate_over_vspace(f, vpa.wgts)
+        I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts)
+        I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts)
+        I3 = integrate_over_vspace(f, vpa.grid, 3, vpa.wgts)
+        I4 = integrate_over_vspace(f, vpa.grid, 4, vpa.wgts)
+        J0 = integrate_over_vspace(residual, vpa.wgts)
+        J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts)
+        J2 = integrate_over_vspace(residual, vpa.grid, 2, vpa.wgts)
+
+        A = ((I2*J2 - J0*I4)*(I2*I4 - I3^2) + (I2*I3 - I1*I4)*(J2*I3 - J1*I4)) /
+            ((I0*I4 - I2^2)*(I2*I4 - I3^2) - (I2*I3 - I1*I4)^2)
+        B = (J2*I3 - J1*I4 + (I2*I3 - I1*I4)*A) / (I2*I4 - I3^2)
+        C = -(J2 + I2*A + I3*B) / I4
+
+        @. residual = residual + (A + B*vpa.grid + C*vpa.grid*vpa.grid) * f
+    elseif moments.evolve_upar
+        I0 = integrate_over_vspace(f, vpa.wgts)
+        I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts)
+        I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts)
+        J0 = integrate_over_vspace(residual, vpa.wgts)
+        J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts)
+
+        A = (I1*J1 - J0*I2) / (I0*I2 - I1^2)
+        B = -(J1 + I1*A) / I2
+
+        @. residual = residual + (A + B*vpa.grid) * f
+
+        C = NaN
+    elseif moments.evolve_density
+        I0 = integrate_over_vspace(f, vpa.wgts)
+        J0 = integrate_over_vspace(residual, vpa.wgts)
+        A = -J0 / I0
+        @. f = A * f
+        @. residual = residual + A * f
+
+        B = NaN
+        C = NaN
+    else
+        A = NaN
         B = NaN
         C = NaN
     end
diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl
index b1a07f0eb..90fd7f1bd 100644
--- a/moment_kinetics/src/moment_kinetics.jl
+++ b/moment_kinetics/src/moment_kinetics.jl
@@ -33,6 +33,7 @@ include("input_structs.jl")
 include("runge_kutta.jl")
 include("reference_parameters.jl")
 include("coordinates.jl")
+include("nonlinear_solvers.jl")
 include("file_io.jl")
 include("geo.jl")
 include("gyroaverages.jl")
@@ -46,6 +47,7 @@ include("moment_constraints.jl")
 include("fokker_planck_test.jl")
 include("fokker_planck_calculus.jl")
 include("fokker_planck.jl")
+include("boundary_conditions.jl")
 include("advection.jl")
 include("vpa_advection.jl")
 include("z_advection.jl")
@@ -54,7 +56,6 @@ include("vperp_advection.jl")
 include("neutral_r_advection.jl")
 include("neutral_z_advection.jl")
 include("neutral_vz_advection.jl")
-include("boundary_conditions.jl")
 include("charge_exchange.jl")
 include("ionization.jl")
 include("krook_collisions.jl")
@@ -318,14 +319,14 @@ function setup_moment_kinetics(input_dict::AbstractDict;
     # create arrays and do other work needed to setup
     # the main time advance loop -- including normalisation of f by density if requested
 
-    moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs,
-    manufactured_source_list =
+    moments, spectral_objects, scratch, scratch_implicit, advance, advance_implicit,
+    t_params, fp_arrays, gyroavs, manufactured_source_list, nl_solver_params =
         setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrophase,
             vz_spectral, vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral,
             z_spectral, r_spectral, composition, moments, t_input, code_time, dt,
             dt_before_last_fail, collisions, species, geometry, boundary_distributions,
             external_source_settings, num_diss_params, manufactured_solns_input,
-            advection_structs, scratch_dummy, restarting)
+            advection_structs, scratch_dummy, restarting, input_dict)
 
     # This is the closest we can get to the end time of the setup before writing it to the
     # output file
@@ -335,26 +336,28 @@ function setup_moment_kinetics(input_dict::AbstractDict;
     ascii_io, io_moments, io_dfns = setup_file_io(io_input, boundary_distributions, vz,
         vr, vzeta, vpa, vperp, z, r, composition, collisions, moments.evolve_density,
         moments.evolve_upar, moments.evolve_ppar, external_source_settings, input_dict,
-        restart_time_index, previous_runs_info, time_for_setup)
+        restart_time_index, previous_runs_info, time_for_setup, t_params,
+        nl_solver_params)
     # write initial data to ascii files
     write_data_to_ascii(pdf, moments, fields, vpa, vperp, z, r, code_time,
         composition.n_ion_species, composition.n_neutral_species, ascii_io)
     # write initial data to binary files
 
     write_all_moments_data_to_binary(moments, fields, code_time,
-        composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0, t_params, r,
-        z)
+        composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0,
+        t_params, nl_solver_params, r, z)
     write_all_dfns_data_to_binary(pdf, moments, fields, code_time,
         composition.n_ion_species, composition.n_neutral_species, io_dfns, 1, 0.0,
-        t_params, r, z, vperp, vpa, vzeta, vr, vz)
+        t_params, nl_solver_params, r, z, vperp, vpa, vzeta, vr, vz)
 
     begin_s_r_z_vperp_region()
 
-    return pdf, scratch, code_time, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-           moments, fields, spectral_objects, advection_structs,
+    return pdf, scratch, scratch_implicit, code_time, t_params, vz, vr, vzeta, vpa, vperp,
+           gyrophase, z, r, moments, fields, spectral_objects, advection_structs,
            composition, collisions, geometry, gyroavs, boundary_distributions,
-           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
-           manufactured_source_list, ascii_io, io_moments, io_dfns
+           external_source_settings, num_diss_params, nl_solver_params, advance,
+           advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, ascii_io,
+           io_moments, io_dfns
 end
 
 """
diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl
index 56358a27b..82b8b00fb 100644
--- a/moment_kinetics/src/moment_kinetics_input.jl
+++ b/moment_kinetics/src/moment_kinetics_input.jl
@@ -211,6 +211,9 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
         last_fail_proximity_factor=1.05,
         minimum_dt=0.0,
         maximum_dt=Inf,
+        implicit_ion_advance=true,
+        implicit_vpa_advection=false,
+        write_after_fixed_step_count=false,
         high_precision_error_sum=false,
        )
     if timestepping_section["nwrite"] > timestepping_section["nstep"]
@@ -224,38 +227,37 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
     if timestepping_section["atol_upar"] === nothing
         timestepping_section["atol_upar"] = 1.0e-2 * timestepping_section["rtol"]
     end
-    timestepping_input = Dict_to_NamedTuple(timestepping_section)
-    if !(0.0 < timestepping_input.step_update_prefactor < 1.0)
-        error("step_update_prefactor=$(timestepping_input.step_update_prefactor) must "
+    if !(0.0 < timestepping_section["step_update_prefactor"] < 1.0)
+        error("step_update_prefactor=$(timestepping_section["step_update_prefactor"]) must "
               * "be between 0.0 and 1.0.")
     end
-    if timestepping_input.max_increase_factor ≤ 1.0
-        error("max_increase_factor=$(timestepping_input.max_increase_factor) must "
+    if timestepping_section["max_increase_factor"] ≤ 1.0
+        error("max_increase_factor=$(timestepping_section["max_increase_factor"]) must "
               * "be greater than 1.0.")
     end
-    if timestepping_input.max_increase_factor_near_last_fail ≤ 1.0
+    if timestepping_section["max_increase_factor_near_last_fail"] ≤ 1.0
         error("max_increase_factor_near_last_fail="
-              * "$(timestepping_input.max_increase_factor_near_last_fail) must be "
+              * "$(timestepping_section["max_increase_factor_near_last_fail"]) must be "
               * "greater than 1.0.")
     end
-    if !isinf(timestepping_input.max_increase_factor_near_last_fail) &&
-            timestepping_input.max_increase_factor_near_last_fail > timestepping_input.max_increase_factor
+    if !isinf(timestepping_section["max_increase_factor_near_last_fail"]) &&
+        timestepping_section["max_increase_factor_near_last_fail"] > timestepping_section["max_increase_factor"]
         error("max_increase_factor_near_last_fail="
-              * "$(timestepping_input.max_increase_factor_near_last_fail) should be "
+              * "$(timestepping_section["max_increase_factor_near_last_fail"]) should be "
               * "less than max_increase_factor="
-              * "$(timestepping_input.max_increase_factor).")
+              * "$(timestepping_section["max_increase_factor"]).")
     end
-    if timestepping_input.last_fail_proximity_factor ≤ 1.0
+    if timestepping_section["last_fail_proximity_factor"] ≤ 1.0
         error("last_fail_proximity_factor="
-              * "$(timestepping_input.last_fail_proximity_factor) must be "
+              * "$(timestepping_section["last_fail_proximity_factor"]) must be "
               * "greater than 1.0.")
     end
-    if timestepping_input.minimum_dt > timestepping_input.maximum_dt
-        error("minimum_dt=$(timestepping_input.minimum_dt) must be less than "
-              * "maximum_dt=$(timestepping_input.maximum_dt)")
+    if timestepping_section["minimum_dt"] > timestepping_section["maximum_dt"]
+        error("minimum_dt=$(timestepping_section["minimum_dt"]) must be less than "
+              * "maximum_dt=$(timestepping_section["maximum_dt"])")
     end
-    if timestepping_input.maximum_dt ≤ 0.0
-        error("maximum_dt=$(timestepping_input.maximum_dt) must be positive")
+    if timestepping_section["maximum_dt"] ≤ 0.0
+        error("maximum_dt=$(timestepping_section["maximum_dt"]) must be positive")
     end
 
     use_for_init_is_default = !(("manufactured_solns" ∈ keys(scan_input)) &&
@@ -662,12 +664,12 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
     end
 
     # check input (and initialized coordinate structs) to catch errors/unsupported options
-    check_input(io, output_dir, timestepping_input.nstep, timestepping_input.dt, r, z,
+    check_input(io, output_dir, timestepping_section["nstep"], timestepping_section["dt"], r, z,
                 vpa, vperp, composition, species_immutable, evolve_moments,
                 num_diss_params, save_inputs_to_txt, collisions)
 
     # return immutable structs for z, vpa, species and composition
-    all_inputs = (io_immutable, evolve_moments, timestepping_input, z, z_spectral, r,
+    all_inputs = (io_immutable, evolve_moments, timestepping_section, z, z_spectral, r,
                   r_spectral, vpa, vpa_spectral, vperp, vperp_spectral, gyrophase,
                   gyrophase_spectral, vz, vz_spectral, vr, vr_spectral, vzeta,
                   vzeta_spectral, composition, species_immutable, collisions, geometry,
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
new file mode 100644
index 000000000..79a87fc09
--- /dev/null
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -0,0 +1,932 @@
+"""
+Nonlinear solvers, using Jacobian-free Newton-Krylov methods.
+
+These solvers use an outer Newton iteration. Each step of the Newton iteration requires a
+linear solve of the Jacobian. An 'inexact Jacobian' method is used, and the GMRES method
+(GMRES is a type of Krylov solver) is used to (approximately) solve the (approximate)
+linear system.
+
+!!! warning "parallelisation"
+    This module uses shared- and distributed-memory parallelism, so the functions in it
+    should not be called inside any kind of parallelised loop. This restriction should be
+    lifted somehow in future...
+
+`parallel_map()` is used to apply elementwise functions to arbitrary numbers of arguments
+using shared-memory parallelism. We do this rather than writing the loops out explicitly
+so that `newton_solve!()` and `linear_solve!()` can work for arrays with any combination
+of dimensions.
+
+Useful references:
+[1] V.A. Mousseau and D.A. Knoll, "Fully Implicit Kinetic Solution of Collisional Plasmas", Journal of Computational Physics 136, 308–323 (1997), https://doi.org/10.1006/jcph.1997.5736.
+[2] V.A. Mousseau, "Fully Implicit Kinetic Modelling of Collisional Plasmas", PhD thesis, Idaho National Engineering Laboratory (1996), https://inis.iaea.org/collection/NCLCollectionStore/_Public/27/067/27067141.pdf.
+[3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method
+[4] https://www.rikvoorhaar.com/blog/gmres
+[5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. 
+"""
+module nonlinear_solvers
+
+export setup_nonlinear_solve, gather_nonlinear_solver_counters!,
+       reset_nonlinear_per_stage_counters, newton_solve!
+
+using ..array_allocation: allocate_float, allocate_shared_float
+using ..communication
+using ..coordinates: coordinate
+using ..input_structs
+using ..looping
+using ..type_definitions: mk_float, mk_int
+
+using LinearAlgebra
+using MINPACK
+using MPI
+using SparseArrays
+
+struct nl_solver_info{TH,TV,Tlig,Tprecon}
+    rtol::mk_float
+    atol::mk_float
+    nonlinear_max_iterations::mk_int
+    linear_rtol::mk_float
+    linear_atol::mk_float
+    linear_restart::mk_int
+    linear_max_restarts::mk_int
+    H::TH
+    V::TV
+    linear_initial_guess::Tlig
+    n_solves::Ref{mk_int}
+    nonlinear_iterations::Ref{mk_int}
+    linear_iterations::Ref{mk_int}
+    global_n_solves::Ref{mk_int}
+    global_nonlinear_iterations::Ref{mk_int}
+    global_linear_iterations::Ref{mk_int}
+    stage_counter::Ref{mk_int}
+    serial_solve::Bool
+    max_nonlinear_iterations_this_step::Ref{mk_int}
+    preconditioner_update_interval::mk_int
+    preconditioners::Tprecon
+end
+
+"""
+
+`coords` is a NamedTuple of coordinates corresponding to the dimensions of the variable
+that will be solved. The entries in `coords` should be ordered the same as the memory
+layout of the variable to be solved (i.e. fastest-varying first).
+
+The nonlinear solver will be called inside a loop over `outer_coords`, so we might need
+for example a preconditioner object for each point in that outer loop.
+"""
+function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol=1.0e-5,
+                               default_atol=1.0e-12, serial_solve=false,
+                               preconditioner_type=nothing)
+    nl_solver_section = set_defaults_and_check_section!(
+        input_dict, "nonlinear_solver";
+        rtol=default_rtol,
+        atol=default_atol,
+        nonlinear_max_iterations=20,
+        linear_rtol=1.0e-3,
+        linear_atol=1.0,
+        linear_restart=10,
+        linear_max_restarts=0,
+        preconditioner_update_interval=300,
+       )
+    nl_solver_input = Dict_to_NamedTuple(nl_solver_section)
+
+    coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ coords)
+    total_size_coords = prod(coord_sizes)
+    outer_coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ outer_coords)
+
+    linear_restart = nl_solver_input.linear_restart
+
+    if serial_solve
+        H = allocate_float(linear_restart + 1, linear_restart)
+        V = allocate_float(reverse(coord_sizes)..., linear_restart+1)
+        H .= 0.0
+        V .= 0.0
+    else
+        H = allocate_shared_float(linear_restart + 1, linear_restart)
+        V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1)
+
+        begin_serial_region()
+        @serial_region begin
+            H .= 0.0
+            V .= 0.0
+        end
+    end
+
+    if preconditioner_type == "lu"
+        # Create dummy LU solver objects so we can create an array for preconditioners.
+        # These will be calculated properly within the time loop.
+        preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)),
+                               reverse(outer_coord_sizes))
+    else
+        preconditioners = nothing
+    end
+
+    linear_initial_guess = zeros(linear_restart)
+
+    return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol,
+                          nl_solver_input.nonlinear_max_iterations,
+                          nl_solver_input.linear_rtol, nl_solver_input.linear_atol,
+                          linear_restart, nl_solver_input.linear_max_restarts, H, V,
+                          linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
+                          Ref(0), Ref(0), serial_solve, Ref(0),
+                          nl_solver_input.preconditioner_update_interval, preconditioners)
+end
+
+"""
+    reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing})
+
+Reset the counters that hold per-step totals or maximums in `nl_solver_params`.
+"""
+function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing})
+    if nl_solver_params === nothing
+        return nothing
+    end
+
+    nl_solver_params.max_nonlinear_iterations_this_step[] = 0
+
+    return nothing
+end
+
+"""
+    gather_nonlinear_solver_counters!(nl_solver_params)
+
+Where necessary, gather the iteration counters for the nonlinear solvers.
+
+Where each solve runs in parallel using all processes, this is unnecessary as the count on
+each process already represents the global count. Where each solve uses only a subset of
+processes, the counters from different solves need to be added together to get the global
+total.
+"""
+function gather_nonlinear_solver_counters!(nl_solver_params)
+    if nl_solver_params.ion_advance !== nothing
+        # Solve runs in parallel on all processes, so no need to collect here
+        nl_solver_params.ion_advance.global_n_solves[] = nl_solver_params.ion_advance.n_solves[]
+        nl_solver_params.ion_advance.global_nonlinear_iterations[] = nl_solver_params.ion_advance.nonlinear_iterations[]
+        nl_solver_params.ion_advance.global_linear_iterations[] = nl_solver_params.ion_advance.linear_iterations[]
+    end
+    if nl_solver_params.vpa_advection !== nothing
+        # Solves are run in serial on separate processes, so need a global Allreduce
+        nl_solver_params.vpa_advection.global_n_solves[] = MPI.Allreduce(nl_solver_params.vpa_advection.n_solves[], +, comm_world)
+        nl_solver_params.vpa_advection.global_nonlinear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world)
+        nl_solver_params.vpa_advection.global_linear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world)
+    end
+end
+
+"""
+    newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, w, nl_solver_params;
+                  left_preconditioner=nothing, right_preconditioner=nothing, coords)
+
+`x` is the initial guess at the solution, and is overwritten by the result of the Newton
+solve.
+
+`rhs_func!(residual, x)` is the function we are trying to find a solution of. It calculates
+```math
+\\mathtt{residual} = F(\\mathtt{x})
+```
+where we are trying to solve \$F(x)=0\$.
+
+`residual`, `delta_x`, `rhs_delta` and `w` are buffer arrays, with the same size as `x`,
+used internally.
+
+`left_preconditioner` or `right_preconditioner` apply preconditioning. They should be
+passed a function that solves \$P.x = b\$ where \$P\$ is the preconditioner matrix, \$b\$
+is given by the values passed to the function as the argument, and the result \$x\$ is
+returned by overwriting the argument.
+
+`coords` is a NamedTuple containing the `coordinate` structs corresponding to each
+dimension in `x`.
+
+
+Tolerances
+----------
+
+Note that the meaning of the relative tolerance `rtol` and absolute tolerance `atol` is
+very different for the outer Newton iteration and the inner GMRES iteration.
+
+For the outer Newton iteration the residual \$R(x^n)\$ measures the departure of the
+system from the solution (at each grid point). Its size can be compared to the size of the
+solution `x`, so it makes sense to define an `error norm' for \$R(x^n)\$ as
+```math
+E(x^n) = \\left\\lVert \\frac{R(x^n)}{\\mathtt{rtol} x^n \\mathtt{atol}} \\right\\rVert_2
+```
+where \$\\left\\lVert \\cdot \\right\\rVert\$ is the 'L2 norm' (square-root of sum of
+squares). We can further try to define a grid-size independent error norm by dividing out
+the number of grid points to get a root-mean-square (RMS) error rather than an L2 norm.
+```math
+E_{\\mathrm{RMS}}(x^n) = \\sqrt{ \\frac{1}{N} \\sum_i \\frac{R(x^n)_i}{\\mathtt{rtol} x^n_i \\mathtt{atol}} }
+```
+where \$N\$ is the total number of grid points.
+
+In contrast, GMRES is constructed to minimise the L2 norm of \$r_k = b - A\\cdot x_k\$
+where GMRES is solving the linear system \$A\\cdot x = b\$, \$x_k\$ is the approximation
+to the solution \$x\$ at the \$k\$'th iteration and \$r_k\$ is the residual at the
+\$k\$'th iteration. There is no flexibility to measure error relative to \$x\$ in any
+sense. For GMRES, a `relative tolerance' is relative to the residual of the
+right-hand-side \$b\$, which is the first iterate \$x_0\$ (when no initial guess is
+given). [Where a non-zero initial guess is given it might be better to use a different
+stopping criterion, see Carson et al. section 3.8.]. The stopping criterion for the GMRES
+iteration is therefore
+```
+\\left\\lVert r_k \\right\\rVert < \\max(\\mathtt{linear\\_rtol} \\left\\lVert r_0 \\right\\rVert, \\mathtt{linear\\_atol}) = \\max(\\mathtt{linear\\_rtol} \\left\\lVert b \\right\\rVert, \\mathtt{linear\\_atol})
+```
+As the GMRES solve is only used to get the right `direction' for the next Newton step, it
+is not necessary to have a very tight `linear_rtol` for the GMRES solve.
+"""
+function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
+                       nl_solver_params; left_preconditioner=nothing,
+                       right_preconditioner=nothing, coords)
+
+    rtol = nl_solver_params.rtol
+    atol = nl_solver_params.atol
+
+    distributed_norm = get_distributed_norm(coords, rtol, atol, x)
+    distributed_dot = get_distributed_dot(coords, rtol, atol, x)
+    parallel_map = get_parallel_map(coords)
+    parallel_delta_x_calc = get_parallel_delta_x_calc(coords)
+
+    residual_func!(residual, x)
+    residual_norm = distributed_norm(residual)
+    counter = 0
+    linear_counter = 0
+
+    parallel_map(()->0.0, delta_x)
+
+    close_counter = -1
+    close_linear_counter = -1
+    success = true
+    previous_residual_norm = residual_norm
+    while (counter < 1 && residual_norm > 1.0e-8) || residual_norm > 1.0
+        counter += 1
+        #println("\nNewton ", counter)
+
+        if left_preconditioner === nothing
+            left_preconditioner = identity
+        end
+        if right_preconditioner === nothing
+            right_preconditioner = identity
+        end
+
+        # Solve (approximately?):
+        #   J δx = -RHS(x)
+        parallel_map(()->0.0, delta_x)
+        linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w;
+                                   coords=coords, rtol=nl_solver_params.linear_rtol,
+                                   atol=nl_solver_params.linear_atol,
+                                   restart=nl_solver_params.linear_restart,
+                                   max_restarts=nl_solver_params.linear_max_restarts,
+                                   left_preconditioner=left_preconditioner,
+                                   right_preconditioner=right_preconditioner,
+                                   H=nl_solver_params.H, V=nl_solver_params.V,
+                                   rhs_delta=rhs_delta,
+                                   initial_guess=nl_solver_params.linear_initial_guess,
+                                   distributed_norm=distributed_norm,
+                                   distributed_dot=distributed_dot,
+                                   parallel_map=parallel_map,
+                                   parallel_delta_x_calc=parallel_delta_x_calc,
+                                   serial_solve=nl_solver_params.serial_solve)
+        linear_counter += linear_its
+
+        # If the residual does not decrease, we will do a line search to find an update
+        # that does decrease the residual. The value of `x` is used to define the
+        # normalisation value with rtol that is used to calculate the residual, so do not
+        # want to update it until the line search is completed (otherwise the norm changes
+        # during the line search, which might make it fail to converge). So calculate the
+        # updated value in the buffer `w` until the line search is completed, and only
+        # then copy it into `x`.
+        parallel_map((x) -> x, w, x)
+        parallel_map((x,delta_x) -> x + delta_x, w, x, delta_x)
+        residual_func!(residual, w)
+
+        # For the Newton iteration, we want the norm divided by the (sqrt of the) number
+        # of grid points, so we can use a tolerance that is independent of the size of the
+        # grid. This is unlike the norms needed in `linear_solve!()`.
+        residual_norm = distributed_norm(residual)
+        if isnan(residual_norm)
+            error("NaN in Newton iteration at iteration $counter")
+        end
+        if residual_norm > previous_residual_norm
+            # Do a line search between x and x+delta_x to try to find an update that does
+            # decrease residual_norm
+            s = 0.5
+            while s > 1.0e-2
+                parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x)
+                residual_func!(residual, x)
+                residual_norm = distributed_norm(residual)
+                if residual_norm ≤ previous_residual_norm
+                    break
+                end
+                s *= 0.5
+            end
+
+            if residual_norm > previous_residual_norm
+                # Failed to find a point that decreases the residual, so try a negative
+                # step
+                s = -1.0e-5
+                parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x)
+                residual_func!(residual, x)
+                residual_norm = distributed_norm(residual)
+                if residual_norm > previous_residual_norm
+                    # That didn't work either, so just take the full step and hope for
+                    # convergence later
+                    parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x)
+                    residual_func!(residual, x)
+                    residual_norm = distributed_norm(residual)
+                end
+            end
+        end
+        parallel_map((w) -> w, x, w)
+        previous_residual_norm = residual_norm
+
+        #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol")
+
+        if residual_norm < 0.1/rtol && close_counter < 0 && close_linear_counter < 0
+            close_counter = counter
+            close_linear_counter = linear_counter
+        end
+
+        if counter > nl_solver_params.nonlinear_max_iterations
+            println("maximum iteration limit reached")
+            success = false
+            break
+        end
+    end
+    nl_solver_params.n_solves[] += 1
+    nl_solver_params.nonlinear_iterations[] += counter
+    nl_solver_params.linear_iterations[] += linear_counter
+    nl_solver_params.max_nonlinear_iterations_this_step[] =
+        max(counter, nl_solver_params.max_nonlinear_iterations_this_step[])
+#    println("Newton iterations: ", counter)
+#    println("Final residual: ", residual_norm)
+#    println("Total linear iterations: ", linear_counter)
+#    println("Linear iterations per Newton: ", linear_counter / counter)
+#
+#    println("Newton iterations after close: ", counter - close_counter)
+#    println("Total linear iterations after close: ", linear_counter - close_linear_counter)
+#    println("Linear iterations per Newton after close: ", (linear_counter - close_linear_counter) / (counter - close_counter))
+#    println()
+
+    return success
+end
+
+"""
+    get_distributed_norm(coords, rtol, atol, x)
+
+Get a 'distributed_norm' function that acts on arrays with dimensions given by the
+entries in `coords`.
+"""
+function get_distributed_norm(coords, rtol, atol, x)
+    dims = keys(coords)
+    if dims == (:z,)
+        this_norm = distributed_norm_z
+    elseif dims == (:vpa,)
+        this_norm = distributed_norm_vpa
+    elseif dims == (:s, :r, :z, :vperp, :vpa)
+        this_norm = distributed_norm_s_r_z_vperp_vpa
+    else
+        error("dims=$dims is not supported yet. Need to write another "
+              * "`distributed_norm_*()` function in nonlinear_solvers.jl")
+    end
+
+    wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x=x,
+                                                     coords=coords, kwargs...)
+
+    return wrapped_norm
+end
+
+function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x)
+    z = coords.z
+
+    begin_z_region()
+
+    local_norm = 0.0
+    if z.irank < z.nrank - 1
+        zend = z.n
+        @loop_z iz begin
+            if iz == zend
+                continue
+            end
+            local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2
+        end
+    else
+        @loop_z iz begin
+            local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2
+        end
+    end
+
+    _block_synchronize()
+    block_norm = MPI.Reduce(local_norm, +, comm_block[])
+
+    if block_rank[] == 0
+        global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[])
+        global_norm = sqrt(global_norm / z.n_global)
+    else
+        global_norm = nothing
+    end
+    global_norm = MPI.bcast(global_norm, comm_block[]; root=0)
+
+    return global_norm
+end
+
+function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x)
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    residual_norm = 0.0
+    for i ∈ eachindex(residual, x)
+        residual_norm += (residual[i] / (rtol * abs(x[i]) + atol))^2
+    end
+
+    residual_norm = sqrt(residual_norm / length(residual))
+
+    return residual_norm
+end
+
+function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5};
+                                          coords, rtol, atol, x)
+    n_ion_species = coords.s
+    r = coords.r
+    z = coords.z
+    vperp = coords.vperp
+    vpa = coords.vpa
+
+    begin_s_r_z_vperp_vpa_region()
+
+    local_norm = 0.0
+    if r.irank < r.nrank - 1
+        rend = r.n
+    else
+        rend = r.n + 1
+    end
+    if z.irank < z.nrank - 1
+        zend = z.n
+    else
+        zend = z.n + 1
+    end
+    @loop_s_r_z is ir iz begin
+        if ir == rend || iz == zend
+            continue
+        end
+        @loop_vperp_vpa ivperp ivpa begin
+            local_norm += (residual[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol))^2
+        end
+    end
+
+    _block_synchronize()
+    block_norm = MPI.Reduce(local_norm, +, comm_block[])
+
+    if block_rank[] == 0
+        global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[])
+        global_norm = sqrt(global_norm / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global))
+    else
+        global_norm = nothing
+    end
+    global_norm = MPI.bcast(global_norm, comm_block[]; root=0)
+
+    return global_norm
+end
+
+"""
+    get_distributed_dot(coords, rtol, atol, x)
+
+Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries
+in `coords`.
+"""
+function get_distributed_dot(coords, rtol, atol, x)
+    dims = keys(coords)
+    if dims == (:z,)
+        this_dot = distributed_dot_z
+    elseif dims == (:vpa,)
+        this_dot = distributed_dot_vpa
+    elseif dims == (:s, :r, :z, :vperp, :vpa)
+        this_dot = distributed_dot_s_r_z_vperp_vpa
+    else
+        error("dims=$dims is not supported yet. Need to write another "
+              * "`distributed_dot_*()` function in nonlinear_solvers.jl")
+    end
+
+    wrapped_dot = (args...; kwargs...) -> this_dot(args...; rtol=rtol, atol=atol, x=x,
+                                                   coords=coords, kwargs...)
+
+end
+
+function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1};
+                           coords, atol, rtol, x)
+
+    z = coords.z
+
+    begin_z_region()
+
+    z = coords.z
+
+    local_dot = 0.0
+    if z.irank < z.nrank - 1
+        zend = z.n
+        @loop_z iz begin
+            if iz == zend
+                continue
+            end
+            local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2
+        end
+    else
+        @loop_z iz begin
+            local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2
+        end
+    end
+
+    _block_synchronize()
+    block_dot = MPI.Reduce(local_dot, +, comm_block[])
+
+    if block_rank[] == 0
+        global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[])
+        global_dot = global_dot / z.n_global
+    else
+        global_dot = nothing
+    end
+
+    return global_dot
+end
+
+function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1};
+                             coords, atol, rtol, x)
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    local_dot = 0.0
+    for i ∈ eachindex(v,w)
+        local_dot += v[i] * w[i] / (rtol * abs(x[i]) + atol)^2
+    end
+    local_dot = local_dot / length(v)
+    return local_dot
+end
+
+function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5},
+                                         w::AbstractArray{mk_float, 5};
+                                         coords, atol, rtol, x)
+    n_ion_species = coords.s
+    r = coords.r
+    z = coords.z
+    vperp = coords.vperp
+    vpa = coords.vpa
+
+    begin_s_r_z_vperp_vpa_region()
+
+    local_dot = 0.0
+    if r.irank < r.nrank - 1
+        rend = r.n
+    else
+        rend = r.n + 1
+    end
+    if z.irank < z.nrank - 1
+        zend = z.n
+    else
+        zend = z.n + 1
+    end
+
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        if ir == rend || iz == zend
+            continue
+        end
+        local_dot += v[ivpa,ivperp,iz,ir,is] * w[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol)^2
+    end
+
+    _block_synchronize()
+    block_dot = MPI.Reduce(local_dot, +, comm_block[])
+
+    if block_rank[] == 0
+        global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[])
+        global_dot = global_dot / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)
+    else
+        global_dot = nothing
+    end
+
+    return global_dot
+end
+
+"""
+    get_parallel_map(coords)
+
+Get a 'parallel_map' function that acts on arrays with dimensions given by the entries in
+`coords`.
+"""
+function get_parallel_map(coords)
+    dims = keys(coords)
+    if dims == (:z,)
+        return parallel_map_z
+    elseif dims == (:vpa,)
+        return parallel_map_vpa
+    elseif dims == (:s, :r, :z, :vperp, :vpa)
+        return parallel_map_s_r_z_vperp_vpa
+    else
+        error("dims=$dims is not supported yet. Need to write another "
+              * "`parallel_map_*()` function in nonlinear_solvers.jl")
+    end
+end
+
+# Separate versions for different numbers of arguments as generator expressions result in
+# slow code
+
+function parallel_map_z(func, result::AbstractArray{mk_float, 1})
+
+    begin_z_region()
+
+    @loop_z iz begin
+        result[iz] = func()
+    end
+
+    return nothing
+end
+function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1)
+
+    begin_z_region()
+
+    @loop_z iz begin
+        result[iz] = func(x1[iz])
+    end
+
+    return nothing
+end
+function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1, x2)
+
+    begin_z_region()
+
+    @loop_z iz begin
+        result[iz] = func(x1[iz], x2[iz])
+    end
+
+    return nothing
+end
+
+function parallel_map_vpa(func, result::AbstractArray{mk_float, 1})
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    for i ∈ eachindex(result)
+        result[i] = func()
+    end
+    return nothing
+end
+function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1)
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    for i ∈ eachindex(result)
+        result[i] = func(x1[i])
+    end
+    return nothing
+end
+function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2)
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    for i ∈ eachindex(result)
+        result[i] = func(x1[i], x2[i])
+    end
+    return nothing
+end
+
+function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5})
+
+    begin_s_r_z_vperp_vpa_region()
+
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        result[ivpa,ivperp,iz,ir,is] = func()
+    end
+
+    return nothing
+end
+function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1)
+
+    begin_s_r_z_vperp_vpa_region()
+
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is])
+    end
+
+    return nothing
+end
+function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1, x2)
+
+    begin_s_r_z_vperp_vpa_region()
+
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is])
+    end
+
+    return nothing
+end
+
+"""
+    get_parallel_delta_x_calc(coords)
+
+Get a parallelised function that calculates the update `delta_x` from the `V` matrix and
+the minimum residual coefficients `y`.
+"""
+function get_parallel_delta_x_calc(coords)
+    dims = keys(coords)
+    if dims == (:z,)
+        return parallel_delta_x_calc_z
+    elseif dims == (:vpa,)
+        return parallel_delta_x_calc_vpa
+    elseif dims == (:s, :r, :z, :vperp, :vpa)
+        return parallel_delta_x_calc_s_r_z_vperp_vpa
+    else
+        error("dims=$dims is not supported yet. Need to write another "
+              * "`parallel_delta_x_calc_*()` function in nonlinear_solvers.jl")
+    end
+end
+
+function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y)
+
+    begin_z_region()
+
+    ny = length(y)
+    @loop_z iz begin
+        for iy ∈ 1:ny
+            delta_x[iz] += y[iy] * V[iz,iy]
+        end
+    end
+
+    return nothing
+end
+
+function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y)
+    # No parallelism needed when the implicit solve is over vpa - assume that this will be
+    # called inside a parallelised s_r_z_vperp loop.
+    ny = length(y)
+    for ivpa ∈ eachindex(delta_x)
+        for iy ∈ 1:ny
+            delta_x[ivpa] += y[iy] * V[ivpa,iy]
+        end
+    end
+    return nothing
+end
+
+function parallel_delta_x_calc_s_r_z_vperp_vpa(delta_x::AbstractArray{mk_float, 5}, V, y)
+
+    begin_s_r_z_vperp_vpa_region()
+
+    ny = length(y)
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        for iy ∈ 1:ny
+            delta_x[ivpa,ivperp,iz,ir,is] += y[iy] * V[ivpa,ivperp,iz,ir,is,iy]
+        end
+    end
+
+    return nothing
+end
+
+"""
+Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed
+at each step of the outer Newton iteration (in `newton_solve!()`).
+"""
+function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol,
+                       restart, max_restarts, left_preconditioner, right_preconditioner,
+                       H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot,
+                       parallel_map, parallel_delta_x_calc, serial_solve)
+    # Solve (approximately?):
+    #   J δx = residual0
+
+    Jv_scale_factor = 1.0e3
+    inv_Jv_scale_factor = 1.0 / Jv_scale_factor
+
+    # The vectors `v` that are passed to this function will be normalised so that
+    # `distributed_norm(v) == 1.0`. `distributed_norm()` is defined - including the
+    # relative and absolute tolerances from the Newton iteration - so that a vector with a
+    # norm of 1.0 is 'small' in the sense that a vector with a norm of 1.0 is small enough
+    # relative to `x` to consider the iteration converged. This means that `x+v` would be
+    # very close to `x`, so R(x+v)-R(x) would be likely to be badly affected by rounding
+    # errors, because `v` is so small, relative to `x`. We actually want to multiply `v`
+    # by a large number `Jv_scale_factor` (in constrast to the small `epsilon` in the
+    # 'usual' case where the norm does not include either reative or absolute tolerance)
+    # to ensure that we get a reasonable estimate of J.v.
+    function approximate_Jacobian_vector_product!(v)
+        right_preconditioner(v)
+
+        parallel_map((x,v) -> x + Jv_scale_factor * v, v, x, v)
+        residual_func!(rhs_delta, v)
+        parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor,
+                     v, rhs_delta, residual0)
+        left_preconditioner(v)
+        return v
+    end
+
+    # To start with we use 'w' as a buffer to make a copy of residual0 to which we can apply
+    # the left-preconditioner.
+    parallel_map((delta_x) -> delta_x, v, delta_x)
+    left_preconditioner(residual0)
+    # This function transforms the data stored in 'v' from δx to ≈J.δx
+    approximate_Jacobian_vector_product!(v)
+    # Now we actually set 'w' as the first Krylov vector, and normalise it.
+    parallel_map((residual0, v) -> -residual0 - v, w, residual0, v)
+    beta = distributed_norm(w)
+    parallel_map((w) -> w/beta, selectdim(V,ndims(V),1), w)
+
+    # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is
+    # so small that it is smaller than atol, in which case use atol instead.
+    tol = max(rtol * beta, atol)
+
+    lsq_result = nothing
+    residual = Inf
+    counter = 0
+    restart_counter = 1
+    while true
+        for i ∈ 1:restart
+            counter += 1
+            #println("Linear ", counter)
+
+            # Compute next Krylov vector
+            parallel_map((V) -> V, w, selectdim(V,ndims(V),i))
+            approximate_Jacobian_vector_product!(w)
+
+            # Gram-Schmidt orthogonalization
+            for j ∈ 1:i
+                parallel_map((V) -> V, v, selectdim(V,ndims(V),j))
+                w_dot_Vj = distributed_dot(w, v)
+                if serial_solve
+                    H[j,i] = w_dot_Vj
+                else
+                    begin_serial_region()
+                    @serial_region begin
+                        H[j,i] = w_dot_Vj
+                    end
+                end
+                parallel_map((w, V) -> w - H[j,i] * V, w, w, selectdim(V,ndims(V),j))
+            end
+            norm_w = distributed_norm(w)
+            if serial_solve
+                H[i+1,i] = norm_w
+            else
+                begin_serial_region()
+                @serial_region begin
+                    H[i+1,i] = norm_w
+                end
+            end
+            parallel_map((w) -> w / H[i+1,i], selectdim(V,ndims(V),i+1), w)
+
+            function temporary_residual!(result, guess)
+                #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess))
+                result .= @view(H[1:i+1,1:i]) * guess
+                result[1] -= beta
+            end
+
+            # Second argument to fsolve needs to be a Vector{Float64}
+            if serial_solve
+                resize!(initial_guess, i)
+                initial_guess[1] = beta
+                initial_guess[2:i] .= 0.0
+                lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
+                residual = norm(lsq_result.f)
+            else
+                begin_serial_region()
+                if global_rank[] == 0
+                    resize!(initial_guess, i)
+                    initial_guess[1] = beta
+                    initial_guess[2:i] .= 0.0
+                    lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm)
+                    residual = norm(lsq_result.f)
+                else
+                    residual = nothing
+                end
+                residual = MPI.bcast(residual, comm_world; root=0)
+            end
+            if residual < tol
+                break
+            end
+        end
+
+        # Update initial guess fo restart
+        if serial_solve
+            y = lsq_result.x
+        else
+            if global_rank[] == 0
+                y = lsq_result.x
+            else
+                y = nothing
+            end
+            y = MPI.bcast(y, comm_world; root=0)
+        end
+
+        # The following is the `parallel_map()` version of
+        #    delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y))
+        # slightly abusing splatting to get the sum into a lambda-function.
+        parallel_delta_x_calc(delta_x, V, y)
+        right_preconditioner(delta_x)
+
+        if residual < tol || restart_counter > max_restarts
+            break
+        end
+
+        restart_counter += 1
+
+        # Store J.delta_x in the variable delta_x, to use it to calculate the new first
+        # Krylov vector v/beta.
+        parallel_map((delta_x) -> delta_x, v, delta_x)
+        approximate_Jacobian_vector_product!(v)
+
+        # Note residual0 has already had the left_preconditioner!() applied to it.
+        parallel_map((residual0, v) -> -residual0 - v, v, residual0, v)
+        beta = distributed_norm(v)
+        for i ∈ 2:length(y)
+            parallel_map(() -> 0.0, selectdim(V,ndims(V),i))
+        end
+        parallel_map((v) -> v/beta, selectdim(V,ndims(V),1), v)
+    end
+
+    return counter
+end
+
+end
diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl
index 92baf111b..d2767a90f 100644
--- a/moment_kinetics/src/runge_kutta.jl
+++ b/moment_kinetics/src/runge_kutta.jl
@@ -22,6 +22,10 @@ e.g., if f is the function to be updated, then
 f^{n+1}[stage+1] = rk_coef[1,stage]*f^{n} + rk_coef[2,stage]*f^{n+1}[stage] + rk_coef[3,stage]*(f^{n}+dt*G[f^{n+1}[stage]]
 """
 function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operators)
+
+    rk_coefs_implicit = nothing
+    implicit_coefficient_is_zero = nothing
+
     if type == "RKF5(4)"
         # Embedded 5th order / 4th order Runge-Kutta-Fehlberg method.
         # Note uses the 5th order solution for the time advance, even though the error
@@ -39,7 +43,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
                             0    0      7296//2197   553475//233928  6591//1520  2197//836  2197//836;
                             0    0      0           -845//4104      -77//40     -56//55    -1        ;
                             0    0      0            0              -11//40      34//55     8//11    ;
-                            0    0      0            0               0           2//55     -1        ]
+                            0    0      0            0               0           2//55      0        ]
         n_rk_stages = 6
         rk_order = 5
         adaptive = true
@@ -74,7 +78,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
                             0    0    0    0    0     0    1//6 5//6 0     0      6//5;
                             0    0    0    0    0     0    0    1//6 5//6  0     -9//5;
                             0    0    0    0    0     0    0    0    1//6  1//2   9//5;
-                            0    0    0    0    0     0    0    0    0     1//10 -1   ]
+                            0    0    0    0    0     0    0    0    0     1//10  0   ]
         n_rk_stages = 10
         rk_order = 4
         adaptive = true
@@ -87,22 +91,22 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
     elseif type == "Fekete6(4)"
         # Fekete 6-stage 4th-order SSPRK (see comments in util/calculate_rk_coeffs.jl.
         # Note Fekete et al. recommend the 10-stage method rather than this one.
-        #rk_coeffs = mk_float[0.6447024483081 0.2386994475333264  0.5474858792272213     0.3762853856474131     0.0                -0.18132326703443313    -0.0017300417984673078;
-        #                     0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0                 2.9254376698872875e-14 -0.18902907903375094  ;
-        #                     0.0             0.33178669836       0.25530138316744333   -3.352873534367973e-14  0.0                 0.2059808002676668      0.2504712436879622   ;
-        #                     0.0             0.0                 0.1972127376054        0.3518900216285391     0.0                 0.4792670116241715     -0.9397479180374522   ;
-        #                     0.0             0.0                 0.0                    0.2718245927242        0.5641843457422999  9.986456106503283e-14   1.1993626679930305   ;
-        #                     0.0             0.0                 0.0                    0.0                    0.4358156542577     0.3416567872695656     -0.5310335716309745   ;
-        #                     0.0             0.0                 0.0                    0.0                    0.0                 0.1544186678729         0.2117066988196524   ]
+        #rk_coefs = mk_float[0.6447024483081 0.23869944753332645  0.5474858792272213     0.3762853856474131     -6.304828384656085e-17  -0.1813232670344333     -1.0017300417984671 ;
+        #                    0.3552975516919 0.4295138541066736  -6.460461358323626e-14 -1.1868936325049587e-13  3.608184516786869e-18   2.9392365006883485e-14 -0.18902907903375094;
+        #                    0.0             0.33178669836        0.25530138316744333   -3.3545605887402925e-14 -1.0929532856876731e-17  0.20598080026766677     0.2504712436879622 ;
+        #                    0.0             0.0                  0.1972127376054        0.3518900216285391      7.036963218665071e-17   0.47926701162417157    -0.939747918037452  ;
+        #                    0.0             0.0                  0.0                    0.2718245927242         0.5641843457422999      9.97599117309567e-14    1.1993626679930303 ;
+        #                    0.0             0.0                  0.0                    0.0                     0.4358156542577         0.34165678726956566    -0.5310335716309745 ;
+        #                    0.0             0.0                  0.0                    0.0                     0.0                     0.1544186678729         1.2117066988196523 ]
         # Might as well set to 0 the entries that look like they should be 0 apart from
         # rounding errors.
-        rk_coefs = mk_float[0.6447024483081 0.2386994475333264 0.5474858792272213  0.3762853856474131 0.0                -0.18132326703443313    -0.0017300417984673078;
-                            0.3552975516919 0.4295138541066736 0.0                 0.0                0.0                 0.0                    -0.18902907903375094  ;
-                            0.0             0.33178669836      0.25530138316744333 0.0                0.0                 0.2059808002676668      0.2504712436879622   ;
-                            0.0             0.0                0.1972127376054     0.3518900216285391 0.0                 0.4792670116241715     -0.9397479180374522   ;
-                            0.0             0.0                0.0                 0.2718245927242    0.5641843457422999  0.0                     1.1993626679930305   ;
-                            0.0             0.0                0.0                 0.0                0.4358156542577     0.3416567872695656     -0.5310335716309745   ;
-                            0.0             0.0                0.0                 0.0                0.0                 0.1544186678729         0.2117066988196524   ]
+        rk_coefs = mk_float[0.6447024483081 0.23869944753332645 0.5474858792272213  0.3762853856474131 0.0                -0.1813232670344333  -0.0017300417984673633 ;
+                            0.3552975516919 0.4295138541066736  0.0                 0.0                0.0                 0.0                 -0.18902907903375094   ;
+                            0.0             0.33178669836       0.25530138316744333 0.0                0.0                 0.20598080026766677  0.2504712436879622    ;
+                            0.0             0.0                 0.1972127376054     0.3518900216285391 0.0                 0.47926701162417157 -0.939747918037452     ;
+                            0.0             0.0                 0.0                 0.2718245927242    0.5641843457422999  0.0                  1.1993626679930303    ;
+                            0.0             0.0                 0.0                 0.0                0.4358156542577     0.34165678726956566 -0.5310335716309745    ;
+                            0.0             0.0                 0.0                 0.0                0.0                 0.1544186678729      1.2117066988196523    ]
         n_rk_stages = 6
         rk_order = 4
         adaptive = true
@@ -118,7 +122,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
         # the addition of a truncation error estimate.
         rk_coefs = mk_float[1//2 0    2//3 0    -1//2;
                             0    1//2 1//6 1//2  1   ;
-                            1//2 1//2 1//6 1//2 -1//2]
+                            1//2 1//2 1//6 1//2  1//2]
         n_rk_stages = 4
         rk_order = 3
         adaptive = true
@@ -134,7 +138,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
                             1//3 2//3 0    0     3//16;
                             0    1//3 2//3 0     0    ;
                             0    0    1//3 1//2  3//16;
-                            0    0    0    1//4 -1//4 ]
+                            0    0    0    1//4  3//4 ]
         n_rk_stages = 4
         rk_order = 2
         adaptive = true
@@ -144,6 +148,57 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
         else
             CFL_prefactor = input_CFL_prefactor
         end
+    elseif type == "KennedyCarpenterARK437"
+        # 7-stage 4th-order IMEX scheme from Kennedy & Carpenter 2019
+        # (https://doi.org/10.1016/j.apnum.2018.10.007)
+        rk_coefs = mk_float[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000;
+                            247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000;
+                            0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000;
+                            0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000;
+                            0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721;
+                            0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150;
+                            0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168;
+                            0 0 0 0 0 0 247//2000 2441//2470]
+        rk_coefs_implicit = mk_float[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000;
+                                     0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820;
+                                     0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164;
+                                     0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213;
+                                     0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193;
+                                     0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102;
+                                     0 0 0 0 0 0 247//2000 1753//2000 0]
+        implicit_coefficient_is_zero = Bool[true, false, false, false, false, false, false]
+        n_rk_stages = 7
+        rk_order = 4
+        adaptive = true
+        low_storage = false
+        if input_CFL_prefactor ≤ 0.0
+            CFL_prefactor = 4.0
+        else
+            CFL_prefactor = input_CFL_prefactor
+        end
+    elseif type == "KennedyCarpenterARK324"
+        # 4-stage 3th-order IMEX scheme from Kennedy & Carpenter 2003
+        # (https://doi.org/10.1016/S0168-9274(02)00138-1,
+        # https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf)
+        rk_coefs = mk_float[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800;
+                            1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700;
+                            0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925;
+                            0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300;
+                            0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825]
+        rk_coefs_implicit = mk_float[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200;
+                                     0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425;
+                                     0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225;
+                                     0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0]
+        implicit_coefficient_is_zero = Bool[true, false, false, false]
+        n_rk_stages = 4
+        rk_order = 3
+        adaptive = true
+        low_storage = false
+        if input_CFL_prefactor ≤ 0.0
+            CFL_prefactor = 4.0
+        else
+            CFL_prefactor = input_CFL_prefactor
+        end
     elseif type == "SSPRK4"
         n_rk_stages = 4
         rk_coefs = allocate_float(3, n_rk_stages)
@@ -197,35 +252,71 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat
         CFL_prefactor = NaN
     else
         error("Unsupported RK timestep method, type=$type\n"
-              * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4),"
-              * "Fekete6(4), Fekete4(3), Fekete4(2)")
+              * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4), "
+              * "Fekete6(4), Fekete4(3), Fekete4(2), KennedyCarpenterARK437, "
+              * "KennedyCarpenterARK324")
     end
 
     if split_operators && adaptive
         error("Adaptive timestepping not supported with operator splitting")
     end
 
-    return rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor
+    # Sanity check size of rk_coefs arrays
+    if low_storage
+        correct_size = (3, n_rk_stages + adaptive)
+        if size(rk_coefs) != correct_size
+            error("Size of rk_coefs, $(size(rk_coefs)) is not "
+                  * "(n_rk_stages+1, n_rk_stages+1)=$correct_size")
+        end
+
+        correct_size_implicit = (3, n_rk_stages + 1 + adaptive)
+        if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit
+            error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not "
+                  * "(3, n_rk_stages+2)=$correct_size_implicit")
+        end
+    else
+        correct_size = (n_rk_stages + 1, n_rk_stages + adaptive)
+        if size(rk_coefs) != correct_size
+            error("Size of rk_coefs, $(size(rk_coefs)) is not "
+                  * "(n_rk_stages+1, n_rk_stages+1)=$correct_size")
+        end
+
+        correct_size_implicit = (n_rk_stages, n_rk_stages + 1 + adaptive)
+        if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit
+            error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not "
+                  * "(n_rk_stages, n_rk_stages+2)=$correct_size_implicit")
+        end
+    end
+
+    correct_size = (n_rk_stages,)
+    if implicit_coefficient_is_zero !== nothing &&
+            size(implicit_coefficient_is_zero) != correct_size
+        error("Size of implicit_coefficient_is_zero, $(size(implicit_coefficient_is_zero)) "
+              * "is not (n_rk_stages,)=$correct_size")
+    end
+
+    return rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages,
+           rk_order, adaptive, low_storage, CFL_prefactor
 end
 
 """
 use Runge Kutta to update any ion velocity moments evolved separately from
 the pdf
 """
-function rk_update_evolved_moments!(scratch, moments, t_params, istage)
+function rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage)
     # if separately evolving the particle density, update using RK
     if moments.evolve_density
-        rk_update_variable!(scratch, :density, t_params, istage)
+        rk_update_variable!(scratch, scratch_implicit, :density, t_params, istage)
     end
 
     # if separately evolving the parallel flow, update using RK
     if moments.evolve_upar
-        rk_update_variable!(scratch, :upar, t_params, istage)
+        rk_update_variable!(scratch, scratch_implicit, :upar, t_params, istage)
     end
 
     # if separately evolving the parallel pressure, update using RK;
     if moments.evolve_ppar
-        rk_update_variable!(scratch, :ppar, t_params, istage)
+        rk_update_variable!(scratch, scratch_implicit, :ppar, t_params, istage)
     end
 end
 
@@ -233,30 +324,35 @@ end
 use Runge Kutta to update any electron velocity moments evolved separately from
 the pdf
 """
-function rk_update_evolved_moments_electron!(scratch, moments, t_params, istage)
+function rk_update_evolved_moments_electron!(scratch, scratch_implicit, moments, t_params,
+                                             istage)
     # For now, electrons always fully moment kinetic, and ppar is the only evolving moment
     # (density and upar are calculated from quasineutrality and ambipolarity constraints).
-    rk_update_variable!(scratch, :ppar_electron, t_params, istage)
+    rk_update_variable!(scratch, scratch_implicit, :ppar_electron, t_params, istage)
 end
 
 """
 use Runge Kutta to update any neutral-particle velocity moments evolved separately from
 the pdf
 """
-function rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage)
+function rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params,
+                                            istage)
     # if separately evolving the particle density, update using RK
     if moments.evolve_density
-        rk_update_variable!(scratch, :density_neutral, t_params, istage; neutrals=true)
+        rk_update_variable!(scratch, scratch_implicit, :density_neutral, t_params, istage;
+                            neutrals=true)
     end
 
     # if separately evolving the parallel flow, update using RK
     if moments.evolve_upar
-        rk_update_variable!(scratch, :uz_neutral, t_params, istage; neutrals=true)
+        rk_update_variable!(scratch, scratch_implicit, :uz_neutral, t_params, istage;
+                            neutrals=true)
     end
 
     # if separately evolving the parallel pressure, update using RK;
     if moments.evolve_ppar
-        rk_update_variable!(scratch, :pz_neutral, t_params, istage; neutrals=true)
+        rk_update_variable!(scratch, scratch_implicit, :pz_neutral, t_params, istage;
+                            neutrals=true)
     end
 end
 
@@ -264,29 +360,52 @@ end
 Update the variable named `var_symbol` in `scratch` to the current Runge-Kutta stage
 `istage`. The current value in `scratch[istage+1]` is the result of the forward-Euler
 update, which needs to be corrected using values from previous stages with the Runge-Kutta
-coefficients.
+coefficients. `scratch_implicit` contains the results of backward-Euler updates, which are
+needed for IMEX timestepping schemes.
 """
-function rk_update_variable!(scratch, var_symbol::Symbol, t_params, istage; neutrals=false)
+function rk_update_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_params,
+                             istage; neutrals=false)
     if t_params.low_storage
         var_arrays = (getfield(scratch[istage+1], var_symbol),
                       getfield(scratch[istage], var_symbol),
                       getfield(scratch[1], var_symbol))
+        if scratch_implicit === nothing
+            var_arrays_implicit = (nothing, nothing, nothing)
+        else
+            var_arrays_implicit = (getfield(scratch_implicit[istage+1], var_symbol),
+                                   getfield(scratch_implicit[istage], var_symbol),
+                                   getfield(scratch_implicit[1], var_symbol))
+        end
     else
         var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:istage+1)
+        if scratch_implicit === nothing
+            var_arrays_implicit = nothing
+        else
+            var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol)
+                                        for i ∈ 1:istage)
+        end
     end
     rk_coefs = @view t_params.rk_coefs[:,istage]
+    if t_params.rk_coefs_implicit === nothing
+        rk_coefs_implicit = nothing
+    else
+        rk_coefs_implicit = @view t_params.rk_coefs_implicit[:,istage+1]
+    end
 
     if neutrals
         if t_params.low_storage
-            rk_update_loop_neutrals_low_storage!(rk_coefs, var_arrays...)
+            rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit,
+                                                 var_arrays..., var_arrays_implicit...)
         else
-            rk_update_loop_neutrals!(rk_coefs, var_arrays)
+            rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, var_arrays,
+                                     var_arrays_implicit)
         end
     else
         if t_params.low_storage
-            rk_update_loop_low_storage!(rk_coefs, var_arrays...)
+            rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, var_arrays...,
+                                        var_arrays_implicit...)
         else
-            rk_update_loop!(rk_coefs, var_arrays)
+            rk_update_loop!(rk_coefs, rk_coefs_implicit, var_arrays, var_arrays_implicit)
         end
     end
 
@@ -294,44 +413,67 @@ function rk_update_variable!(scratch, var_symbol::Symbol, t_params, istage; neut
 end
 
 """
-Calculate the estimated truncation error for the variable named `var_symbol`, for adaptive
-timestepping methods.
+Calculate a lower-order approximation for the variable named `var_symbol`, which can be
+used to calculate an error estimate for adaptive timestepping methods.
 
-The calculated error is stored in `var_symbol` in `scratch[2]` (as this entry should not
-be needed again after the error is calculated).
+The lower-order approximation is stored in `var_symbol` in `scratch[2]` (as this entry
+should not be needed again after the lower-order approximation is calculated).
 """
-function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=false)
+function rk_loworder_solution!(scratch, scratch_implicit, var_symbol::Symbol, t_params;
+                               neutrals=false)
     if !t_params.adaptive
-        error("rk_error_variable!() should only be called when using adaptive "
+        error("rk_lowerder_solution!() should only be called when using adaptive "
               * "timestepping")
     end
     if t_params.low_storage
         var_arrays = (getfield(scratch[end], var_symbol),
                       getfield(scratch[end-1], var_symbol),
                       getfield(scratch[1], var_symbol))
+        if scratch_implicit === nothing
+            var_arrays_implicit = (nothing, nothing, nothing)
+        else
+            var_arrays_implicit = (getfield(scratch_implicit[end], var_symbol),
+                                   getfield(scratch_implicit[end-1], var_symbol),
+                                   getfield(scratch_implicit[1], var_symbol))
+        end
     else
         var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:length(scratch))
+        if scratch_implicit === nothing
+            var_arrays_implicit = nothing
+        else
+            var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol)
+                                        for i ∈ 1:length(scratch_implicit))
+        end
     end
 
-    error_coefs = @view t_params.rk_coefs[:,end]
+    loworder_coefs = @view t_params.rk_coefs[:,end]
+    if t_params.rk_coefs_implicit === nothing
+        loworder_coefs_implicit = nothing
+    else
+        loworder_coefs_implicit = @view t_params.rk_coefs_implicit[:,end]
+    end
 
     # The second element of `scratch` is not needed any more for the RK update, so we can
-    # overwrite it with the error estimate.
+    # overwrite it with the lower-order approximation.
     output = getfield(scratch[2], var_symbol)
 
     if neutrals
         if t_params.low_storage
-            rk_update_loop_neutrals_low_storage!(error_coefs, var_arrays...;
+            rk_update_loop_neutrals_low_storage!(loworder_coefs, loworder_coefs_implicit,
+                                                 var_arrays..., var_arrays_implicit...;
                                                  output=output)
         else
-            rk_update_loop_neutrals!(error_coefs, var_arrays; output=output)
+            rk_update_loop_neutrals!(loworder_coefs, loworder_coefs_implicit, var_arrays,
+                                     var_arrays_implicit; output=output)
         end
     else
         if t_params.low_storage
-            rk_update_loop_low_storage!(error_coefs, var_arrays...;
+            rk_update_loop_low_storage!(loworder_coefs, loworder_coefs_implicit,
+                                        var_arrays..., var_arrays_implicit...;
                                         output=output)
         else
-            rk_update_loop!(error_coefs, var_arrays; output=output)
+            rk_update_loop!(loworder_coefs, loworder_coefs_implicit, var_arrays,
+                            var_arrays_implicit; output=output)
         end
     end
 
@@ -339,172 +481,295 @@ function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=fals
 end
 
 # Ion distribution function
-function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,5},
+function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit,
+                                     new::AbstractArray{mk_float,5},
                                      old::AbstractArray{mk_float,5},
-                                     first::AbstractArray{mk_float,5}; output=new)
+                                     first::AbstractArray{mk_float,5}, new_implicit,
+                                     old_implicit, first_implicit; output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_s_r_z_vperp_vpa_region()
-    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
-        output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] +
-                                       rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] +
-                                       rk_coefs[3]*new[ivpa,ivperp,iz,ir,is]
+    if rk_coefs_implicit === nothing
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs[3]*new[ivpa,ivperp,iz,ir,is]
+        end
+    else
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir,is] +
+                                           rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir,is]
+        end
     end
 
     return nothing
 end
-function rk_update_loop!(rk_coefs,
-                         var_arrays::NTuple{N,AbstractArray{mk_float,5}};
-                         output=var_arrays[N]) where N
+function rk_update_loop!(rk_coefs, rk_coefs_implicit,
+                         var_arrays::NTuple{N,AbstractArray{mk_float,5}},
+                         var_arrays_implicit; output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_s_r_z_vperp_vpa_region()
-    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
-        output[ivpa,ivperp,iz,ir,is] =
-            sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir,is] =
+                sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N)
+        end
+    else
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir,is] =
+                sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) +
+                sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N-1)
+        end
     end
 
     return nothing
 end
 
 # Ion moments
-function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,3},
+function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit,
+                                     new::AbstractArray{mk_float,3},
                                      old::AbstractArray{mk_float,3},
-                                     first::AbstractArray{mk_float,3}; output=new)
+                                     first::AbstractArray{mk_float,3}, new_implicit,
+                                     old_implicit, first_implicit; output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_s_r_z_region()
-    @loop_s_r_z is ir iz begin
-        output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] +
-                           rk_coefs[2]*old[iz,ir,is] +
-                           rk_coefs[3]*new[iz,ir,is]
+    if rk_coefs_implicit === nothing
+        @loop_s_r_z is ir iz begin
+            output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] +
+                               rk_coefs[2]*old[iz,ir,is] +
+                               rk_coefs[3]*new[iz,ir,is]
+        end
+    else
+        @loop_s_r_z is ir iz begin
+            output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] +
+                               rk_coefs[2]*old[iz,ir,is] +
+                               rk_coefs[3]*new[iz,ir,is] +
+                               rk_coefs_implicit[1]*first_implicit[iz,ir,is] +
+                               rk_coefs_implicit[2]*old_implicit[iz,ir,is]
+        end
     end
 
     return nothing
 end
-function rk_update_loop!(rk_coefs,
-                         var_arrays::NTuple{N,AbstractArray{mk_float,3}};
-                         output=var_arrays[N]) where N
+function rk_update_loop!(rk_coefs, rk_coefs_implicit,
+                         var_arrays::NTuple{N,AbstractArray{mk_float,3}},
+                         var_arrays_implicit; output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_s_r_z_region()
-    @loop_s_r_z is ir iz begin
-        output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_s_r_z is ir iz begin
+            output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N)
+        end
+    else
+        @loop_s_r_z is ir iz begin
+            output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) +
+                               sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,is] for i ∈ 1:N-1)
+        end
     end
 
     return nothing
 end
 
 # Electron distribution function
-function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,4},
+function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit,
+                                     new::AbstractArray{mk_float,4},
                                      old::AbstractArray{mk_float,4},
-                                     first::AbstractArray{mk_float,4}; output=new)
+                                     first::AbstractArray{mk_float,4}, new_implicit,
+                                     old_implicit, first_implicit; output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_r_z_vperp_vpa_region()
-    @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
-        output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] +
-                                    rk_coefs[2]*old[ivpa,ivperp,iz,ir] +
-                                    rk_coefs[3]*new[ivpa,ivperp,iz,ir]
+    if rk_coefs_implicit === nothing
+        @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] +
+                                        rk_coefs[2]*old[ivpa,ivperp,iz,ir] +
+                                        rk_coefs[3]*new[ivpa,ivperp,iz,ir]
+        end
+    else
+        @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] +
+                                        rk_coefs[2]*old[ivpa,ivperp,iz,ir] +
+                                        rk_coefs[3]*new[ivpa,ivperp,iz,ir] +
+                                        rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir] +
+                                        rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir]
+        end
     end
 
     return nothing
 end
-function rk_update_loop!(rk_coefs,
-                         var_arrays::NTuple{N,AbstractArray{mk_float,4}};
-                         output=var_arrays[N]) where N
+function rk_update_loop!(rk_coefs, rk_coefs_implicit,
+                         var_arrays::NTuple{N,AbstractArray{mk_float,4}},
+                         var_arrays_implicit; output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_r_z_vperp_vpa_region()
-    @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
-        output[ivpa,ivperp,iz,ir] =
-            sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir] =
+                sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N)
+        end
+    else
+        @loop_r_z_vperp_vpa ir iz ivperp ivpa begin
+            output[ivpa,ivperp,iz,ir] =
+                sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) +
+                sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir]
+                    for i ∈ 1:N-1)
+        end
     end
 
     return nothing
 end
 
 # Electron moments
-function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,2},
+function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit,
+                                     new::AbstractArray{mk_float,2},
                                      old::AbstractArray{mk_float,2},
-                                     first::AbstractArray{mk_float,2}; output=new)
+                                     first::AbstractArray{mk_float,2}, new_implicit,
+                                     old_implicit, first_implicit; output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_r_z_region()
-    @loop_r_z ir iz begin
-        output[iz,ir] = rk_coefs[1]*first[iz,ir] +
-                        rk_coefs[2]*old[iz,ir] +
-                        rk_coefs[3]*new[iz,ir]
+    if rk_coefs_implicit === nothing
+        @loop_r_z ir iz begin
+            output[iz,ir] = rk_coefs[1]*first[iz,ir] +
+                            rk_coefs[2]*old[iz,ir] +
+                            rk_coefs[3]*new[iz,ir]
+        end
+    else
+        @loop_r_z ir iz begin
+            output[iz,ir] = rk_coefs[1]*first[iz,ir] +
+                            rk_coefs[2]*old[iz,ir] +
+                            rk_coefs[3]*new[iz,ir] +
+                            rk_coefs_implicit[1]*first_implicit[iz,ir] +
+                            rk_coefs_implicit[2]*old_implicit[iz,ir]
+        end
     end
 
     return nothing
 end
-function rk_update_loop!(rk_coefs,
-                         var_arrays::NTuple{N,AbstractArray{mk_float,2}};
+function rk_update_loop!(rk_coefs, rk_coefs_implicit,
+                         var_arrays::NTuple{N,AbstractArray{mk_float,2}},
+                         var_arrays_implicit;
                          output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_r_z_region()
-    @loop_r_z ir iz begin
-        output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_r_z ir iz begin
+            output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N)
+        end
+    else
+        @loop_r_z ir iz begin
+            output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) +
+                            sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir]
+                                for i ∈ 1:N-1)
+        end
     end
 
     return nothing
 end
 
 # Neutral distribution function
-function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,6},
-                                     old::AbstractArray{mk_float,6},
-                                     first::AbstractArray{mk_float,6}; output=new)
+function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit,
+                                              new::AbstractArray{mk_float,6},
+                                              old::AbstractArray{mk_float,6},
+                                              first::AbstractArray{mk_float,6},
+                                              new_implicit, old_implicit, first_implicit;
+                                              output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_sn_r_z_vzeta_vr_vz_region()
-    @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
-        output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] +
-                                           rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] +
-                                           rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn]
+    if rk_coefs_implicit === nothing
+        @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
+            output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn]
+        end
+    else
+        @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
+            output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs_implicit[1]*first_implicit[ivz,ivr,ivzeta,iz,ir,isn] +
+                                               rk_coefs_implicit[2]*old_implicit[ivz,ivr,ivzeta,iz,ir,isn]
+        end
     end
 
     return nothing
 end
-function rk_update_loop_neutrals!(rk_coefs,
-                                  var_arrays::NTuple{N,AbstractArray{mk_float,6}};
-                                  output=var_arrays[N]) where N
+function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit,
+                                  var_arrays::NTuple{N,AbstractArray{mk_float,6}},
+                                  var_arrays_implicit; output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_sn_r_z_vzeta_vr_vz_region()
-    @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
-        output[ivz,ivr,ivzeta,iz,ir,isn] =
-            sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
+            output[ivz,ivr,ivzeta,iz,ir,isn] =
+                sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N)
+        end
+    else
+        @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
+            output[ivz,ivr,ivzeta,iz,ir,isn] =
+                sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) +
+                sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivz,ivr,ivzeta,iz,ir,isn]
+                    for i ∈ 1:N-1)
+        end
     end
 
     return nothing
 end
 
 # Neutral moments
-function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,3},
+function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit,
+                                              new::AbstractArray{mk_float,3},
                                               old::AbstractArray{mk_float,3},
-                                              first::AbstractArray{mk_float,3};
+                                              first::AbstractArray{mk_float,3},
+                                              new_implicit, old_implicit, first_implicit;
                                               output=new)
     @boundscheck length(rk_coefs) == 3
 
     begin_sn_r_z_region()
-    @loop_sn_r_z isn ir iz begin
-        output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] +
-                            rk_coefs[2]*old[iz,ir,isn] +
-                            rk_coefs[3]*new[iz,ir,isn]
+    if rk_coefs_implicit === nothing
+        @loop_sn_r_z isn ir iz begin
+            output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] +
+                                rk_coefs[2]*old[iz,ir,isn] +
+                                rk_coefs[3]*new[iz,ir,isn]
+        end
+    else
+        @loop_sn_r_z isn ir iz begin
+            output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] +
+                                rk_coefs[2]*old[iz,ir,isn] +
+                                rk_coefs[3]*new[iz,ir,isn] +
+                                rk_coefs_implicit[1]*first_implicit[iz,ir,isn] +
+                                rk_coefs_implicit[2]*old_implicit[iz,ir,isn]
+        end
     end
 
     return nothing
 end
-function rk_update_loop_neutrals!(rk_coefs,
-                                  var_arrays::NTuple{N,AbstractArray{mk_float,3}};
-                                  output=var_arrays[N]) where N
+function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit,
+                                  var_arrays::NTuple{N,AbstractArray{mk_float,3}},
+                                  var_arrays_implicit; output=var_arrays[N]) where N
     @boundscheck length(rk_coefs) ≥ N
 
     begin_sn_r_z_region()
-    @loop_sn_r_z isn ir iz begin
-        output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N)
+    if rk_coefs_implicit === nothing
+        @loop_sn_r_z isn ir iz begin
+            output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N)
+        end
+    else
+        @loop_sn_r_z isn ir iz begin
+            output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) +
+                                sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,isn]
+                                    for i ∈ 1:N-1)
+        end
     end
 
     return nothing
@@ -542,13 +807,13 @@ be known at compile time, allowing this function to be efficient.
 """
 function local_error_norm end
 
-function local_error_norm(error::MPISharedArray{mk_float,2},
+function local_error_norm(f_loworder::MPISharedArray{mk_float,2},
                           f::MPISharedArray{mk_float,2}, rtol, atol; method="Linf",
                           skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0)
     if method == "Linf"
         f_max = -Inf
         @loop_r_z ir iz begin
-            error_norm = abs(error[iz,ir]) / (rtol*abs(f[iz,ir]) + atol)
+            error_norm = abs(f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol)
             f_max = max(f_max, error_norm)
         end
         return f_max
@@ -558,12 +823,12 @@ function local_error_norm(error::MPISharedArray{mk_float,2},
             if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1)
                 continue
             end
-            error_norm = (error[iz,ir] / (rtol*abs(f[iz,ir]) + atol))^2
+            error_norm = ((f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol))^2
             L2sum += error_norm
         end
         # Will sum results from different processes in shared memory block after returning
         # from this function.
-        nz, nr = size(error)
+        nz, nr = size(f_loworder)
         if skip_r_inner
             nr -= 1
         end
@@ -575,7 +840,7 @@ function local_error_norm(error::MPISharedArray{mk_float,2},
         error("Unrecognized method '$method'")
     end
 end
-function local_error_norm(error::MPISharedArray{mk_float,3},
+function local_error_norm(f_loworder::MPISharedArray{mk_float,3},
                           f::MPISharedArray{mk_float,3}, rtol, atol, neutral=false;
                           method="Linf", skip_r_inner=false, skip_z_lower=false,
                           error_sum_zero=0.0)
@@ -583,12 +848,12 @@ function local_error_norm(error::MPISharedArray{mk_float,3},
         f_max = -Inf
         if neutral
             @loop_sn_r_z isn ir iz begin
-                error_norm = abs(error[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol)
+                error_norm = abs(f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol)
                 f_max = max(f_max, error_norm)
             end
         else
             @loop_s_r_z is ir iz begin
-                error_norm = abs(error[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol)
+                error_norm = abs(f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol)
                 f_max = max(f_max, error_norm)
             end
         end
@@ -600,7 +865,7 @@ function local_error_norm(error::MPISharedArray{mk_float,3},
                 if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1)
                     continue
                 end
-                error_norm = (error[iz,ir,isn] / (rtol*abs(f[iz,ir,isn]) + atol))^2
+                error_norm = ((f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol))^2
                 L2sum += error_norm
             end
         else
@@ -608,13 +873,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3},
                 if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1)
                     continue
                 end
-                error_norm = (error[iz,ir,is] / (rtol*abs(f[iz,ir,is]) + atol))^2
+                error_norm = ((f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol))^2
                 L2sum += error_norm
             end
         end
         # Will sum results from different processes in shared memory block after returning
         # from this function.
-        nz, nr, nspecies = size(error)
+        nz, nr, nspecies = size(f_loworder)
         if skip_r_inner
             nr -= 1
         end
@@ -626,13 +891,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3},
         error("Unrecognized method '$method'")
     end
 end
-function local_error_norm(error::MPISharedArray{mk_float,5},
+function local_error_norm(f_loworder::MPISharedArray{mk_float,5},
                           f::MPISharedArray{mk_float,5}, rtol, atol; method="Linf",
                           skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0)
     if method == "Linf"
         f_max = -Inf
         @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
-            error_norm = abs(error[ivpa,ivperp,iz,ir,is]) /
+            error_norm = abs(f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) /
                          (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol)
             f_max = max(f_max, error_norm)
         end
@@ -643,13 +908,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5},
             if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1)
                 continue
             end
-            error_norm = (error[ivpa,ivperp,iz,ir,is] /
+            error_norm = ((f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) /
                           (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol))^2
             L2sum += error_norm
         end
         # Will sum results from different processes in shared memory block after returning
         # from this function.
-        nvpa, nvperp, nz, nr, nspecies = size(error)
+        nvpa, nvperp, nz, nr, nspecies = size(f_loworder)
         if skip_r_inner
             nr -= 1
         end
@@ -661,13 +926,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5},
         error("Unrecognized method '$method'")
     end
 end
-function local_error_norm(error::MPISharedArray{mk_float,6},
+function local_error_norm(f_loworder::MPISharedArray{mk_float,6},
                           f::MPISharedArray{mk_float,6}, rtol, atol; method="Linf",
                           skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0)
     if method == "Linf"
         f_max = -Inf
         @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin
-            error_norm = abs(error[ivz,ivr,ivzeta,iz,ir,isn]) /
+            error_norm = abs(f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f[ivz,ivr,ivzeta,iz,ir,isn]) /
                          (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol)
             f_max = max(f_max, error_norm)
         end
@@ -678,7 +943,7 @@ function local_error_norm(error::MPISharedArray{mk_float,6},
             if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1)
                 continue
             end
-            error_norm = (error[ivz,ivr,ivzeta,iz,ir,isn] /
+            error_norm = ((f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f_loworder[ivz,ivr,ivzeta,iz,ir,isn]) /
                           (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol))^2
             L2sum += error_norm
         end
@@ -692,12 +957,14 @@ end
 
 """
     adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
-                                       total_points, current_dt, error_norm_method)
+                                       total_points, current_dt, error_norm_method,
+                                       success, nl_max_its_fraction)
 
 Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`.
 """
 function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms,
-                                            total_points, current_dt, error_norm_method)
+                                            total_points, current_dt, error_norm_method,
+                                            success, nl_max_its_fraction)
     # Get global minimum of CFL limits
     CFL_limit = nothing
     this_limit_caused_by = nothing
@@ -706,10 +973,10 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
         CFL_limits = MPI.Allreduce(CFL_limits, min, comm_inter_block[])
         CFL_limit_caused_by = argmin(CFL_limits)
         CFL_limit = CFL_limits[CFL_limit_caused_by]
-        # Reserve first five entries of t_params.limit_caused_by for accuracy,
-        # max_increase_factor, max_increase_factor_near_fail, minimum_dt and maximum_dt
-        # limits.
-        this_limit_caused_by = CFL_limit_caused_by + 5
+        # Reserve first four entries of t_params.limit_caused_by for max_increase_factor,
+        # max_increase_factor_near_fail, minimum_dt, maximum_dt limits and
+        # high_nl_iterations, then the next `n_variables` for RK accuracy limits.
+        this_limit_caused_by = CFL_limit_caused_by + 5 + t_params.n_variables
     end
 
     if error_norm_method == "Linf"
@@ -717,10 +984,12 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
         error_norms = MPI.Reduce(error_norms, max, comm_block[]; root=0)
 
         error_norm = nothing
+        max_error_variable_index = -1
         @serial_region begin
             # Get maximum error over all blocks
             error_norms = MPI.Allreduce(error_norms, max, comm_inter_block[])
-            error_norm = maximum(error_norms)
+            max_error_variable_index = argmax(error_norms)
+            error_norm = error_norms[max_error_variable_index]
         end
         error_norm = MPI.bcast(error_norm, 0, comm_block[])
     elseif error_norm_method == "L2"
@@ -728,6 +997,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
         error_norms = MPI.Reduce(error_norms, +, comm_block[]; root=0)
 
         error_norm = nothing
+        max_error_variable_index = -1
         @serial_region begin
             # Get maximum error over all blocks
             error_norms = MPI.Allreduce(error_norms, +, comm_inter_block[])
@@ -740,6 +1010,9 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
             # larger number of points in the distribution functions does not mean that
             # error on the moments is ignored.
             error_norm = mean(error_norms)
+
+            # Record which variable had the maximum error
+            max_error_variable_index = argmax(error_norms)
         end
 
         error_norm = MPI.bcast(error_norm, 0, comm_block[])
@@ -747,10 +1020,54 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
         error("Unrecognized error_norm_method '$method'")
     end
 
-    # Use current_dt instead of t_params.dt[] here because we are about to write to
-    # the shared-memory variable t_params.dt[] below, and we do not want to add an extra
-    # _block_synchronize() call after reading it here.
-    if error_norm > 1.0 && current_dt > t_params.minimum_dt
+    just_completed_output_step = false
+
+    if !success
+        # Iteration failed in implicit part of timestep try decreasing timestep
+
+        # Set scratch[end] equal to scratch[1] to start the timestep over
+        scratch_temp = scratch[t_params.n_rk_stages+1]
+        scratch[t_params.n_rk_stages+1] = scratch[1]
+        scratch[1] = scratch_temp
+
+        @serial_region begin
+            t_params.failure_counter[] += 1
+
+            if t_params.previous_dt[] > 0.0
+                # If previous_dt=0, the previous step was also a failure so only update
+                # dt_before_last_fail when previous_dt>0
+                t_params.dt_before_last_fail[] = t_params.previous_dt[]
+            end
+
+            # If we were trying to take a step to the output timestep, dt will be smaller on
+            # the re-try, so will not reach the output time.
+            t_params.step_to_output[] = false
+
+            # Decrease timestep by 1/2 - this factor should probably be settable!
+            # Note when nonlinear solve iteration fails, we do not enforce
+            # minimum_dt, as the timesolver must error if we do not decrease dt.
+            if t_params.dt[] > t_params.minimum_dt
+                # ...but try decreasing just to minimum_dt first, if the dt is still
+                # bigger than this.
+                t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt)
+            else
+                t_params.dt[] = t_params.dt[] / 2.0
+            end
+
+            # Don't update the simulation time, as this step failed
+            t_params.previous_dt[] = 0.0
+
+            # Call the 'cause' of the timestep failure the variable that has the biggest
+            # error norm here
+            t_params.failure_caused_by[end] += 1
+        end
+    elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13)
+        # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when
+        # t+dt=next_output_time.
+        # Use current_dt instead of t_params.dt[] here because we are about to write to
+        # the shared-memory variable t_params.dt[] below, and we do not want to add an
+        # extra _block_synchronize() call after reading it here.
+        #
         # Timestep failed, reduce timestep and re-try
 
         # Set scratch[end] equal to scratch[1] to start the timestep over
@@ -777,20 +1094,11 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
                                 t_params.dt[] * t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order))
             t_params.dt[] = max(t_params.dt[], t_params.minimum_dt)
 
-            minimum_dt = 1.e-14
-            if t_params.dt[] < minimum_dt
-                println("Time advance failed: trying to set dt=$(t_params.dt[]) less than "
-                        * "$minimum_dt at t=$t. Ending run.")
-                # Set dt negative to signal an error
-                t_params.dt[] = -1.0
-            end
-
             # Don't update the simulation time, as this step failed
             t_params.previous_dt[] = 0.0
 
             # Call the 'cause' of the timestep failure the variable that has the biggest
             # error norm here
-            max_error_variable_index = argmax(error_norms)
             t_params.failure_caused_by[max_error_variable_index] += 1
 
             #println("t=$t, timestep failed, error_norm=$error_norm, error_norms=$error_norms, decreasing timestep to ", t_params.dt[])
@@ -810,6 +1118,8 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
                 if t_params.dt[] > CFL_limit
                     t_params.dt[] = CFL_limit
                 end
+
+                just_completed_output_step = true
             else
                 # Adjust timestep according to Fehlberg's suggestion
                 # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method).
@@ -821,12 +1131,15 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
                 if t_params.dt[] > CFL_limit
                     t_params.dt[] = CFL_limit
                 else
-                    this_limit_caused_by = 1
+                    # Reserve first four entries of t_params.limit_caused_by for
+                    # max_increase_factor, max_increase_factor_near_fail, minimum_dt and
+                    # maximum_dt limits, high_nl_iterations.
+                    this_limit_caused_by = 5 + max_error_variable_index
                 end
 
                 # Limit so timestep cannot increase by a large factor, which might lead to
                 # numerical instability in some cases.
-                max_cap_limit_caused_by = 2
+                max_cap_limit_caused_by = 1
                 if isinf(t_params.max_increase_factor_near_last_fail)
                     # Not using special timestep limiting near last failed dt value
                     max_cap = t_params.max_increase_factor * t_params.previous_dt[]
@@ -843,7 +1156,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
                         max_cap = max(slow_increase_threshold,
                                       t_params.max_increase_factor_near_last_fail *
                                       t_params.previous_dt[])
-                        max_cap_limit_caused_by = 3
+                        max_cap_limit_caused_by = 2
                     end
                 end
                 if t_params.dt[] > max_cap
@@ -854,13 +1167,24 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
                 # Prevent timestep from going below minimum_dt
                 if t_params.dt[] < t_params.minimum_dt
                     t_params.dt[] = t_params.minimum_dt
-                    this_limit_caused_by = 4
+                    this_limit_caused_by = 3
                 end
 
                 # Prevent timestep from going above maximum_dt
                 if t_params.dt[] > t_params.maximum_dt
                     t_params.dt[] = t_params.maximum_dt
-                    this_limit_caused_by = 5
+                    this_limit_caused_by = 4
+                end
+
+                if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0
+                    # The last step took many nonlinear iterations, so do not allow the
+                    # timestep to increase.
+                    # If t_params.previous_dt[]==0.0, then the previous step failed so
+                    # timestep will not be increasing, so do not need this check.
+                    if t_params.dt[] > t_params.previous_dt[]
+                        t_params.dt[] = t_params.previous_dt[]
+                        this_limit_caused_by = 5
+                    end
                 end
 
                 t_params.limit_caused_by[this_limit_caused_by] += 1
@@ -875,9 +1199,20 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er
     end
 
     @serial_region begin
-        if t + t_params.dt[] >= t_params.next_output_time[]
+        minimum_dt = 1.e-14
+        if t_params.dt[] < minimum_dt
+            println("Time advance failed: trying to set dt=$(t_params.dt[]) less than "
+                    * "$minimum_dt at t=$t. Ending run.")
+            # Set dt negative to signal an error
+            t_params.dt[] = -1.0
+        end
+
+        current_time = t + t_params.previous_dt[]
+        if (!t_params.write_after_fixed_step_count && !just_completed_output_step
+            && (current_time + t_params.dt[] >= t_params.next_output_time[]))
+
             t_params.dt_before_output[] = t_params.dt[]
-            t_params.dt[] = t_params.next_output_time[] - t
+            t_params.dt[] = t_params.next_output_time[] - current_time
             t_params.step_to_output[] = true
         end
     end
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 27eec4a0f..2460dba15 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -9,26 +9,27 @@ export setup_dummy_and_buffer_arrays
 
 using MPI
 using ..type_definitions: mk_float, mk_int
-using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_bool
+using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_int, allocate_shared_bool
 using ..communication
 using ..communication: _block_synchronize
 using ..debugging
 using ..file_io: write_data_to_ascii, write_all_moments_data_to_binary, write_all_dfns_data_to_binary, debug_dump
 using ..looping
 using ..moment_kinetics_structs: scratch_pdf
-using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status!
+using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status!, update_derived_moments!, update_derived_moments_neutral!
 using ..velocity_moments: update_density!, update_upar!, update_ppar!, update_pperp!, update_qpar!, update_vth!
 using ..velocity_moments: update_neutral_density!, update_neutral_qz!
 using ..velocity_moments: update_neutral_uzeta!, update_neutral_uz!, update_neutral_ur!
 using ..velocity_moments: update_neutral_pzeta!, update_neutral_pz!, update_neutral_pr!
 using ..velocity_moments: calculate_ion_moment_derivatives!, calculate_neutral_moment_derivatives!
-using ..velocity_moments: update_chodura!
 using ..velocity_grid_transforms: vzvrvzeta_to_vpavperp!, vpavperp_to_vzvrvzeta!
 using ..boundary_conditions: enforce_boundary_conditions!
 using ..boundary_conditions: enforce_neutral_boundary_conditions!
+using ..boundary_conditions: vpagrid_to_dzdt, enforce_v_boundary_condition_local!
 using ..input_structs
 using ..moment_constraints: hard_force_moment_constraints!,
-                            hard_force_moment_constraints_neutral!
+                            hard_force_moment_constraints_neutral!,
+                            moment_constraints_on_residual!
 using ..advection: setup_advection
 using ..z_advection: update_speed_z!, z_advection!
 using ..r_advection: update_speed_r!, r_advection!
@@ -36,11 +37,17 @@ using ..neutral_r_advection: update_speed_neutral_r!, neutral_advection_r!
 using ..neutral_z_advection: update_speed_neutral_z!, neutral_advection_z!
 using ..neutral_vz_advection: update_speed_neutral_vz!, neutral_advection_vz!
 using ..vperp_advection: update_speed_vperp!, vperp_advection!
-using ..vpa_advection: update_speed_vpa!, vpa_advection!
-using ..charge_exchange: charge_exchange_collisions_1V!, charge_exchange_collisions_3V!
-using ..ionization: ionization_collisions_1V!, ionization_collisions_3V!, constant_ionization_source!
+using ..vpa_advection: update_speed_vpa!, vpa_advection!, implicit_vpa_advection!
+using ..charge_exchange: ion_charge_exchange_collisions_1V!,
+                         neutral_charge_exchange_collisions_1V!,
+                         ion_charge_exchange_collisions_3V!,
+                         neutral_charge_exchange_collisions_3V!
+using ..ionization: ion_ionization_collisions_1V!, neutral_ionization_collisions_1V!,
+                    ion_ionization_collisions_3V!, neutral_ionization_collisions_3V!,
+                    constant_ionization_source!
 using ..krook_collisions: krook_collisions!
 using ..external_sources
+using ..nonlinear_solvers
 using ..numerical_dissipation: vpa_boundary_buffer_decay!,
                                vpa_boundary_buffer_diffusion!, vpa_dissipation!,
                                z_dissipation!, r_dissipation!, vperp_dissipation!,
@@ -59,7 +66,7 @@ using ..gyroaverages: init_gyro_operators, gyroaverage_pdf!
 using ..manufactured_solns: manufactured_sources
 using ..advection: advection_info
 using ..runge_kutta: rk_update_evolved_moments!, rk_update_evolved_moments_neutral!,
-                     rk_update_variable!, rk_error_variable!,
+                     rk_update_variable!, rk_loworder_solution!,
                      setup_runge_kutta_coefficients!, local_error_norm,
                      adaptive_timestep_update_t_params!
 using ..utils: to_minutes, get_minimum_CFL_z, get_minimum_CFL_vpa,
@@ -131,6 +138,13 @@ struct scratch_dummy_arrays
     # needs to be shared memory
     buffer_vpavperpzrs_1::MPISharedArray{mk_float,5}
     buffer_vpavperpzrs_2::MPISharedArray{mk_float,5}
+    # buffers to hold ion pdf for implicit solves
+    implicit_buffer_vpavperpzrs_1::MPISharedArray{mk_float,5}
+    implicit_buffer_vpavperpzrs_2::MPISharedArray{mk_float,5}
+    implicit_buffer_vpavperpzrs_3::MPISharedArray{mk_float,5}
+    implicit_buffer_vpavperpzrs_4::MPISharedArray{mk_float,5}
+    implicit_buffer_vpavperpzrs_5::MPISharedArray{mk_float,5}
+    implicit_buffer_vpavperpzrs_6::MPISharedArray{mk_float,5}
     
     buffer_vzvrvzetazsn_1::MPISharedArray{mk_float,5}
     buffer_vzvrvzetazsn_2::MPISharedArray{mk_float,5}
@@ -155,6 +169,8 @@ struct scratch_dummy_arrays
     buffer_vpavperp_2::MPISharedArray{mk_float,2}
     buffer_vpavperp_3::MPISharedArray{mk_float,2}
 
+    int_buffer_rs_1::MPISharedArray{mk_int,2}
+    int_buffer_rs_2::MPISharedArray{mk_int,2}
 end 
 
 struct advect_object_struct
@@ -232,12 +248,13 @@ end
 
 Create a [`input_structs.time_info`](@ref) struct using the settings in `t_input`.
 """
-function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload,
-                         manufactured_solns_input, io_input)
-    rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor =
-        setup_runge_kutta_coefficients!(t_input.type,
-                                        t_input.CFL_prefactor,
-                                        t_input.split_operators)
+function setup_time_info(t_input, n_variables, code_time, dt_reload,
+                         dt_before_last_fail_reload, manufactured_solns_input, io_input)
+    rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order,
+    adaptive, low_storage, CFL_prefactor =
+        setup_runge_kutta_coefficients!(t_input["type"],
+                                        t_input["CFL_prefactor"],
+                                        t_input["split_operators"])
 
     if !adaptive
         # No adaptive timestep, want to use the value from the input file even when we are
@@ -252,52 +269,69 @@ function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_relo
     dt_before_last_fail = allocate_shared_float(1)
     step_to_output = allocate_shared_bool(1)
     if block_rank[] == 0
-        dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload
-        previous_dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload
+        dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload
+        previous_dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload
         next_output_time[] = 0.0
-        dt_before_output[] = dt_reload === nothing ? t_input.dt : dt_reload
+        dt_before_output[] = dt_reload === nothing ? t_input["dt"] : dt_reload
         dt_before_last_fail[] = dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload
         step_to_output[] = false
     end
     _block_synchronize()
 
-    end_time = code_time + t_input.dt * t_input.nstep
+    end_time = code_time + t_input["dt"] * t_input["nstep"]
     epsilon = 1.e-11
-    if t_input.nwrite == 0
-        moments_output_times = [end_time]
-    else
-        moments_output_times = [code_time + i*t_input.dt
-                                for i ∈ t_input.nwrite:t_input.nwrite:t_input.nstep]
-    end
-    if moments_output_times[end] < end_time - epsilon
-        push!(moments_output_times, end_time)
-    end
-    if t_input.nwrite_dfns == 0
-        dfns_output_times = [end_time]
+    if adaptive || t_input["write_after_fixed_step_count"]
+        if t_input["nwrite"] == 0
+            moments_output_times = [end_time]
+        else
+            moments_output_times = [code_time + i*t_input["dt"]
+                                    for i ∈ t_input["nwrite"]:t_input["nwrite"]:t_input["nstep"]]
+        end
+        if moments_output_times[end] < end_time - epsilon
+            push!(moments_output_times, end_time)
+        end
+        if t_input["nwrite_dfns"] == 0
+            dfns_output_times = [end_time]
+        else
+            dfns_output_times = [code_time + i*t_input["dt"]
+                                 for i ∈ t_input["nwrite_dfns"]:t_input["nwrite_dfns"]:t_input["nstep"]]
+        end
+        if dfns_output_times[end] < end_time - epsilon
+            push!(dfns_output_times, end_time)
+        end
     else
-        dfns_output_times = [code_time + i*t_input.dt
-                             for i ∈ t_input.nwrite_dfns:t_input.nwrite_dfns:t_input.nstep]
+        # Use nwrite_moments and nwrite_dfns to determine when to write output
+        moments_output_times = mk_float[]
+        dfns_output_times = mk_float[]
     end
-    if dfns_output_times[end] < end_time - epsilon
-        push!(dfns_output_times, end_time)
+
+    if rk_coefs_implicit === nothing
+        # Not an IMEX scheme, so cannot have any implicit terms
+        t_input["implicit_ion_advance"] = false
+        t_input["implicit_vpa_advection"] = false
     end
 
-    if t_input.high_precision_error_sum
+    if t_input["high_precision_error_sum"]
         error_sum_zero = Float128(0.0)
     else
         error_sum_zero = 0.0
     end
-    return time_info(t_input.nstep, end_time, dt_shared, previous_dt_shared, next_output_time,
-                     dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_output,
-                     Ref(0), Ref(0), mk_int[], mk_int[], moments_output_times,
-                     dfns_output_times, t_input.type, rk_coefs, n_rk_stages, rk_order,
-                     adaptive, low_storage, t_input.rtol, t_input.atol, t_input.atol_upar,
-                     t_input.step_update_prefactor, t_input.max_increase_factor,
-                     t_input.max_increase_factor_near_last_fail,
-                     t_input.last_fail_proximity_factor, t_input.minimum_dt,
-                     t_input.maximum_dt, error_sum_zero, t_input.split_operators,
-                     t_input.steady_state_residual, t_input.converged_residual_value,
-                     manufactured_solns_input.use_for_advance, t_input.stopfile_name)
+    return time_info(n_variables, t_input["nstep"], end_time, dt_shared, previous_dt_shared,
+                     next_output_time, dt_before_output, dt_before_last_fail,
+                     CFL_prefactor, step_to_output, Ref(0), Ref(0), mk_int[], mk_int[],
+                     t_input["nwrite"], t_input["nwrite_dfns"], moments_output_times,
+                     dfns_output_times, t_input["type"], rk_coefs, rk_coefs_implicit,
+                     implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive,
+                     low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"],
+                     t_input["step_update_prefactor"], t_input["max_increase_factor"],
+                     t_input["max_increase_factor_near_last_fail"],
+                     t_input["last_fail_proximity_factor"], t_input["minimum_dt"],
+                     t_input["maximum_dt"], t_input["implicit_ion_advance"],
+                     t_input["implicit_vpa_advection"],
+                     t_input["write_after_fixed_step_count"], error_sum_zero,
+                     t_input["split_operators"], t_input["steady_state_residual"],
+                     t_input["converged_residual_value"],
+                     manufactured_solns_input.use_for_advance, t_input["stopfile_name"])
 end
 
 """
@@ -314,58 +348,102 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                              dt_before_last_fail_reload, collisions, species, geometry,
                              boundary_distributions, external_source_settings,
                              num_diss_params, manufactured_solns_input, advection_structs,
-                             scratch_dummy, restarting)
+                             scratch_dummy, restarting, input_dict)
     # define some local variables for convenience/tidiness
     n_ion_species = composition.n_ion_species
     n_neutral_species = composition.n_neutral_species
     ion_mom_diss_coeff = num_diss_params.ion.moment_dissipation_coefficient
-    electron_mom_diss_coeff = num_diss_params.electron.moment_dissipation_coefficient
     neutral_mom_diss_coeff = num_diss_params.neutral.moment_dissipation_coefficient
 
-    t_params = setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload,
-                               manufactured_solns_input, io_input)
+    n_variables = 1 # pdf
+    if moments.evolve_density
+        # ion density
+        n_variables += 1
+    end
+    if moments.evolve_upar
+        # ion flow
+        n_variables += 1
+    end
+    if moments.evolve_ppar
+        # ion pressure
+        n_variables += 1
+    end
+    if composition.n_neutral_species > 0
+        # neutral pdf
+        n_variables += 1
+        if moments.evolve_density
+            # neutral density
+            n_variables += 1
+        end
+        if moments.evolve_upar
+            # neutral flow
+            n_variables += 1
+        end
+        if moments.evolve_ppar
+            # neutral pressure
+            n_variables += 1
+        end
+    end
+    t_params = setup_time_info(t_input, n_variables, code_time, dt_reload,
+                               dt_before_last_fail_reload, manufactured_solns_input,
+                               io_input)
 
     # Make Vectors that count which variable caused timestep limits and timestep failures
     # the right length. Do this setup even when not using adaptive timestepping, because
     # it is easier than modifying the file I/O according to whether we are using adaptive
     # timestepping.
     #
-    # Entries for limit by accuracy (which is an average over all variables),
-    # max_increase_factor, minimum_dt and maximum_dt
+    # Entries for limit by max_increase_factor, max_increase_factor_near_last_fail,
+    # minimum_dt, maximum_dt and high_nl_iterations.
     push!(t_params.limit_caused_by, 0, 0, 0, 0, 0)
 
     # ion pdf
-    push!(t_params.limit_caused_by, 0, 0)
+    push!(t_params.limit_caused_by, 0) # RK accuracy
+    if !t_params.implicit_ion_advance
+        push!(t_params.limit_caused_by, 0) # z-advection CFL limit
+    end
+    if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection)
+        push!(t_params.limit_caused_by, 0) # vpa-advection CFL limit
+    end
     push!(t_params.failure_caused_by, 0)
     if moments.evolve_density
         # ion density
+        push!(t_params.limit_caused_by, 0) # RK accuracy
         push!(t_params.failure_caused_by, 0)
     end
     if moments.evolve_upar
         # ion flow
+        push!(t_params.limit_caused_by, 0) # RK accuracy
         push!(t_params.failure_caused_by, 0)
     end
     if moments.evolve_ppar
         # ion pressure
+        push!(t_params.limit_caused_by, 0) # RK accuracy
         push!(t_params.failure_caused_by, 0)
     end
     if composition.n_neutral_species > 0
         # neutral pdf
-        push!(t_params.limit_caused_by, 0, 0)
+        push!(t_params.limit_caused_by, 0, 0, 0) # RK accuracy plus 2 CFL limits
         push!(t_params.failure_caused_by, 0)
         if moments.evolve_density
             # neutral density
+            push!(t_params.limit_caused_by, 0) # RK accuracy
             push!(t_params.failure_caused_by, 0)
         end
         if moments.evolve_upar
             # neutral flow
+            push!(t_params.limit_caused_by, 0) # RK accuracy
             push!(t_params.failure_caused_by, 0)
         end
         if moments.evolve_ppar
             # neutral pressure
+            push!(t_params.limit_caused_by, 0) # RK accuracy
             push!(t_params.failure_caused_by, 0)
         end
     end
+    if t_params.rk_coefs_implicit !== nothing
+        push!(t_params.failure_caused_by, 0) # Nonlinear iteration fails to converge
+    end
 
     # create the 'advance' struct to be used in later Euler advance to
     # indicate which parts of the equations are to be advanced concurrently.
@@ -375,12 +453,69 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                   external_source_settings, num_diss_params,
                                   manufactured_solns_input, r, z, vperp, vpa, vzeta, vr,
                                   vz)
+    advance_implicit =
+        setup_implicit_advance_flags(moments, composition, t_params, collisions,
+                                     external_source_settings, num_diss_params,
+                                     manufactured_solns_input, r, z, vperp, vpa, vzeta,
+                                     vr, vz)
+    # Check that no flags that shouldn't be are set in both advance and advance_implicit
+    for field ∈ fieldnames(advance_info)
+        if field ∈ (:r_diffusion, :vpa_diffusion, :vperp_diffusion, :vz_diffusion)
+            # These are meant to be set in both structs
+            continue
+        end
+        if getfield(advance, field) && getfield(advance_implicit, field)
+            error("$field is set to `true` in both `advance` and `advance_implicit`")
+        end
+    end
+
+    # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of
+    # timesteps.
+    if t_params.implicit_ion_advance
+        # Implicit solve for vpa_advection term should be done in serial, as it will be
+        # called within a parallelised s_r_z_vperp loop.
+        nl_solver_ion_advance_params =
+            setup_nonlinear_solve(input_dict,
+                                  (s=composition.n_ion_species, r=r, z=z, vperp=vperp,
+                                   vpa=vpa),
+                                  ();
+                                  default_rtol=t_params.rtol / 10.0,
+                                  default_atol=t_params.atol / 10.0,
+                                  preconditioner_type="lu")
+    else
+        nl_solver_ion_advance_params = nothing
+    end
+    if t_params.implicit_vpa_advection
+        # Implicit solve for vpa_advection term should be done in serial, as it will be
+        # called within a parallelised s_r_z_vperp loop.
+        nl_solver_vpa_advection_params =
+            setup_nonlinear_solve(input_dict, (vpa=vpa,),
+                                  (composition.n_ion_species, r, z, vperp);
+                                  default_rtol=t_params.rtol / 10.0,
+                                  default_atol=t_params.atol / 10.0,
+                                  serial_solve=true, preconditioner_type="lu")
+    else
+        nl_solver_vpa_advection_params = nothing
+    end
+    if nl_solver_ion_advance_params !== nothing &&
+            nl_solver_vpa_advection_params !== nothing
+        error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same "
+              * "time")
+    end
+    nl_solver_params = (ion_advance=nl_solver_ion_advance_params,
+                        vpa_advection=nl_solver_vpa_advection_params,)
 
     begin_serial_region()
 
     # create an array of structs containing scratch arrays for the pdf and low-order moments
     # that may be evolved separately via fluid equations
-    scratch = setup_scratch_arrays(moments, pdf.ion.norm, pdf.neutral.norm, t_params.n_rk_stages)
+    n_rk_stages = t_params.n_rk_stages
+    scratch = setup_scratch_arrays(moments, pdf, n_rk_stages + 1)
+    if t_params.rk_coefs_implicit !== nothing
+        scratch_implicit = setup_scratch_arrays(moments, pdf, n_rk_stages)
+    else
+        scratch_implicit = nothing
+    end
     # setup dummy arrays & buffer arrays for z r MPI
     n_neutral_species_alloc = max(1,composition.n_neutral_species)
     # create arrays for Fokker-Planck collisions 
@@ -602,8 +737,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
     # Ensure all processes are synchronized at the end of the setup
     _block_synchronize()
 
-    return moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs,
-           manufactured_source_list
+    return moments, spectral_objects, scratch, scratch_implicit, advance,
+           advance_implicit, t_params, fp_arrays, gyroavs, manufactured_source_list,
+           nl_solver_params
 end
 
 """
@@ -620,14 +756,19 @@ function setup_advance_flags(moments, composition, t_params, collisions,
     advance_vperp_advection = false
     advance_z_advection = false
     advance_r_advection = false
-    advance_cx_1V = false
-    advance_cx = false
-    advance_ionization = false
-    advance_ionization_1V = false
+    advance_ion_cx_1V = false
+    advance_neutral_cx_1V = false
+    advance_ion_cx = false
+    advance_neutral_cx = false
+    advance_ion_ionization = false
+    advance_neutral_ionization = false
+    advance_ion_ionization_1V = false
+    advance_neutral_ionization_1V = false
     advance_ionization_source = false
     advance_krook_collisions_ii = false
     advance_external_source = false
-    advance_numerical_dissipation = false
+    advance_ion_numerical_dissipation = false
+    advance_neutral_numerical_dissipation = false
     advance_sources = false
     advance_continuity = false
     advance_force_balance = false
@@ -649,11 +790,13 @@ function setup_advance_flags(moments, composition, t_params, collisions,
     # otherwise, check to see if the flags need to be set to true
     if !t_params.split_operators
         # default for non-split operators is to include both vpa and z advection together
-        advance_vpa_advection = vpa.n > 1 && z.n > 1
-        advance_vperp_advection = vperp.n > 1 && z.n > 1
-        advance_z_advection = z.n > 1
-        advance_r_advection = r.n > 1
-        if collisions.fkpl.nuii > 0.0 && vperp.n > 1 
+        # If using an IMEX scheme and implicit vpa advection has been requested, then vpa
+        # advection is not included in the explicit part of the timestep.
+        advance_vpa_advection = vpa.n > 1 && z.n > 1 && !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection)
+        advance_vperp_advection = vperp.n > 1 && z.n > 1 && !t_params.implicit_ion_advance
+        advance_z_advection = z.n > 1 && !t_params.implicit_ion_advance
+        advance_r_advection = r.n > 1 && !t_params.implicit_ion_advance
+        if collisions.fkpl.nuii > 0.0 && vperp.n > 1 && !t_params.implicit_ion_advance
             explicit_weakform_fp_collisions = true
         else
             explicit_weakform_fp_collisions = false    
@@ -672,9 +815,11 @@ function setup_advance_flags(moments, composition, t_params, collisions,
             # account for charge exchange collisions
             if abs(collisions.charge_exchange) > 0.0
                 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1
-                    advance_cx_1V = true
+                    advance_ion_cx_1V = !t_params.implicit_ion_advance
+                    advance_neutral_cx_1V = true
                 elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1
-                    advance_cx = true
+                    advance_ion_cx = !t_params.implicit_ion_advance
+                    advance_neutral_cx = true
                 else
                     error("If any perpendicular velocity has length>1 they all must. "
                           * "If all perpendicular velocities have length=1, then vpa and "
@@ -687,9 +832,11 @@ function setup_advance_flags(moments, composition, t_params, collisions,
             # account for ionization collisions
             if abs(collisions.ionization) > 0.0
                 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1
-                    advance_ionization_1V = true
+                    advance_ion_ionization_1V = !t_params.implicit_ion_advance
+                    advance_neutral_ionization_1V = true
                 elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1
-                    advance_ionization = true
+                    advance_ion_ionization = !t_params.implicit_ion_advance
+                    advance_neutral_ionization = true
                 else
                     error("If any perpendicular velocity has length>1 they all must. "
                           * "If all perpendicular velocities have length=1, then vpa and "
@@ -700,20 +847,21 @@ function setup_advance_flags(moments, composition, t_params, collisions,
             end
         end
         # exception for the case where ions are evolved alone but sourced by ionization
-        if collisions.ionization > 0.0 && collisions.constant_ionization_rate
+        if collisions.ionization > 0.0 && collisions.constant_ionization_rate && !t_params.implicit_ion_advance
             advance_ionization_source = true
         end
         if collisions.krook.nuii0 > 0.0
-            advance_krook_collisions_ii = true
+            advance_krook_collisions_ii = !t_params.implicit_ion_advance
         end
-        advance_external_source = external_source_settings.ion.active
+        advance_external_source = external_source_settings.ion.active && !t_params.implicit_ion_advance
         advance_neutral_external_source = external_source_settings.neutral.active
-        advance_numerical_dissipation = true
+        advance_ion_numerical_dissipation = !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection)
+        advance_neutral_numerical_dissipation = true
         # if evolving the density, must advance the continuity equation,
         # in addition to including sources arising from the use of a modified distribution
         # function in the kinetic equation
         if moments.evolve_density
-            advance_sources = true
+            advance_sources = !t_params.implicit_ion_advance
             advance_continuity = true
             if composition.n_neutral_species > 0
                 advance_neutral_sources = true
@@ -724,7 +872,7 @@ function setup_advance_flags(moments, composition, t_params, collisions,
         # in addition to including sources arising from the use of a modified distribution
         # function in the kinetic equation
         if moments.evolve_upar
-            advance_sources = true
+            advance_sources = !t_params.implicit_ion_advance
             advance_force_balance = true
             if composition.n_neutral_species > 0
                 advance_neutral_sources = true
@@ -735,7 +883,7 @@ function setup_advance_flags(moments, composition, t_params, collisions,
         # in addition to including sources arising from the use of a modified distribution
         # function in the kinetic equation
         if moments.evolve_ppar
-            advance_sources = true
+            advance_sources = !t_params.implicit_ion_advance
             advance_energy = true
             if composition.n_neutral_species > 0
                 advance_neutral_sources = true
@@ -743,28 +891,157 @@ function setup_advance_flags(moments, composition, t_params, collisions,
             end
         end
 
+        # *_diffusion flags are set regardless of whether diffusion is included in explicit or
+        # implicit part of timestep, because they are used for boundary conditions, not to
+        # controll which terms are advanced.
+        #
         # flag to determine if a d^2/dr^2 operator is present
-        r_diffusion = (advance_numerical_dissipation && num_diss_params.ion.r_dissipation_coefficient > 0.0)
+        r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0)
         # flag to determine if a d^2/dvpa^2 operator is present
-        vpa_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions)
-        vperp_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions)
-        vz_diffusion = (advance_numerical_dissipation && num_diss_params.neutral.vz_dissipation_coefficient > 0.0)
+        # When using implicit_vpa_advection, the vpa diffusion is included in the implicit
+        # step
+        vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1))
+        vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1))
+        vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0)
+    end
+
+    manufactured_solns_test = manufactured_solns_input.use_for_advance
+
+    return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection,
+                        advance_neutral_z_advection, advance_neutral_r_advection,
+                        advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx,
+                        advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization,
+                        advance_neutral_ionization, advance_ion_ionization_1V,
+                        advance_neutral_ionization_1V, advance_ionization_source,
+                        advance_krook_collisions_ii,
+                        explicit_weakform_fp_collisions,
+                        advance_external_source, advance_ion_numerical_dissipation,
+                        advance_neutral_numerical_dissipation, advance_sources,
+                        advance_continuity, advance_force_balance, advance_energy,
+                        advance_neutral_external_source, advance_neutral_sources,
+                        advance_neutral_continuity, advance_neutral_force_balance,
+                        advance_neutral_energy, manufactured_solns_test, r_diffusion,
+                        vpa_diffusion, vperp_diffusion, vz_diffusion)
+end
+
+"""
+create the 'advance_info' struct to be used in the time advance to
+indicate which parts of the equations are to be advanced implicitly (using
+`backward_euler!()`).
+"""
+function setup_implicit_advance_flags(moments, composition, t_params, collisions,
+                                      external_source_settings, num_diss_params,
+                                      manufactured_solns_input, r, z, vperp, vpa, vzeta,
+                                      vr, vz)
+    # default is not to concurrently advance different operators
+    advance_vpa_advection = false
+    advance_vperp_advection = false
+    advance_z_advection = false
+    advance_r_advection = false
+    advance_ion_cx_1V = false
+    advance_neutral_cx_1V = false
+    advance_ion_cx = false
+    advance_neutral_cx = false
+    advance_ion_ionization = false
+    advance_neutral_ionization = false
+    advance_ion_ionization_1V = false
+    advance_neutral_ionization_1V = false
+    advance_ionization_source = false
+    advance_krook_collisions_ii = false
+    advance_external_source = false
+    advance_ion_numerical_dissipation = false
+    advance_neutral_numerical_dissipation = false
+    advance_sources = false
+    advance_continuity = false
+    advance_force_balance = false
+    advance_energy = false
+    advance_neutral_z_advection = false
+    advance_neutral_r_advection = false
+    advance_neutral_vz_advection = false
+    advance_neutral_external_source = false
+    advance_neutral_sources = false
+    advance_neutral_continuity = false
+    advance_neutral_force_balance = false
+    advance_neutral_energy = false
+    r_diffusion = false
+    vpa_diffusion = false
+    vperp_diffusion = false
+    vz_diffusion = false
+    explicit_weakform_fp_collisions = false
+    if t_params.split_operators
+        error("Implicit timesteps do not support `t_params.split_operators=true`")
     end
+    if t_params.implicit_ion_advance
+        advance_vpa_advection = vpa.n > 1 && z.n > 1
+        advance_vperp_advection = vperp.n > 1 && z.n > 1
+        advance_z_advection = z.n > 1
+        advance_r_advection = r.n > 1
+        if abs(collisions.charge_exchange) > 0.0
+            if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1
+                advance_ion_cx_1V = true
+            elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1
+                advance_ion_cx = true
+            else
+                error("If any perpendicular velocity has length>1 they all must. "
+                      * "If all perpendicular velocities have length=1, then vpa and "
+                      * "vz should be the same.\n"
+                      * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), "
+                      * "vpa.n=$(vpa.n), vz.n=$(vz.n)")
+            end
+        end
+        if abs(collisions.ionization) > 0.0
+            if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1
+                advance_ion_ionization_1V = true
+            elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1
+                advance_ion_ionization = true
+            else
+                error("If any perpendicular velocity has length>1 they all must. "
+                      * "If all perpendicular velocities have length=1, then vpa and "
+                      * "vz should be the same.\n"
+                      * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), "
+                      * "vpa.n=$(vpa.n), vz.n=$(vz.n)")
+            end
+        end
+        advance_ionization_source = collisions.ionization > 0.0 && collisions.constant_ionization_rate
+        advance_krook_collisions_ii = collisions.krook.nuii0 > 0.0
+        advance_external_source = external_source_settings.ion.active
+        advance_ion_numerical_dissipation = true
+        advance_sources = moments.evolve_density || moments.evolve_upar || moments.evolve_ppar
+        explicit_weakform_fp_collisions = collisions.fkpl.nuii > 0.0 && vperp.n > 1
+    elseif t_params.implicit_vpa_advection
+        advance_vpa_advection = true
+        advance_ion_numerical_dissipation = true
+    end
+    # *_diffusion flags are set regardless of whether diffusion is included in explicit or
+    # implicit part of timestep, because they are used for boundary conditions, not to
+    # controll which terms are advanced.
+    #
+    # flag to determine if a d^2/dr^2 operator is present
+    r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0)
+    # flag to determine if a d^2/dvpa^2 operator is present
+    # When using implicit_vpa_advection, the vpa diffusion is included in the implicit
+    # step
+    vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1))
+    vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1))
+    vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0)
 
     manufactured_solns_test = manufactured_solns_input.use_for_advance
 
     return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection,
                         advance_neutral_z_advection, advance_neutral_r_advection,
-                        advance_neutral_vz_advection, advance_cx, advance_cx_1V,
-                        advance_ionization, advance_ionization_1V,
+                        advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx,
+                        advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization,
+                        advance_neutral_ionization, advance_ion_ionization_1V,
+                        advance_neutral_ionization_1V,
                         advance_ionization_source, advance_krook_collisions_ii,
                         explicit_weakform_fp_collisions,
-                        advance_external_source, advance_numerical_dissipation,
-                        advance_sources, advance_continuity, advance_force_balance,
-                        advance_energy, advance_neutral_external_source,
-                        advance_neutral_sources, advance_neutral_continuity,
-                        advance_neutral_force_balance, advance_neutral_energy,
-                        manufactured_solns_test, r_diffusion, vpa_diffusion, vperp_diffusion, vz_diffusion)
+                        advance_external_source, advance_ion_numerical_dissipation,
+                        advance_neutral_numerical_dissipation, advance_sources,
+                        advance_continuity, advance_force_balance, advance_energy,
+                        advance_neutral_external_source, advance_neutral_sources,
+                        advance_neutral_continuity, advance_neutral_force_balance,
+                        advance_neutral_energy, manufactured_solns_test, r_diffusion,
+                        vpa_diffusion, vperp_diffusion, vz_diffusion)
 end
 
 function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies_ion,nspecies_neutral)
@@ -827,6 +1104,13 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
 
     buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
     buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+
+    implicit_buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+    implicit_buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+    implicit_buffer_vpavperpzrs_3 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+    implicit_buffer_vpavperpzrs_4 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+    implicit_buffer_vpavperpzrs_5 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
+    implicit_buffer_vpavperpzrs_6 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
     
     buffer_vzvrvzetazsn_1 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral)
     buffer_vzvrvzetazsn_2 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral)
@@ -849,6 +1133,9 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
     buffer_vpavperp_2 = allocate_shared_float(nvpa,nvperp)
     buffer_vpavperp_3 = allocate_shared_float(nvpa,nvperp)
     
+    int_buffer_rs_1 = allocate_shared_int(nr,nspecies_ion)
+    int_buffer_rs_2 = allocate_shared_int(nr,nspecies_ion)
+
     return scratch_dummy_arrays(dummy_s,dummy_sr,dummy_vpavperp,dummy_zrs,dummy_zrsn,
         buffer_z_1,buffer_z_2,buffer_z_3,buffer_z_4,
         buffer_r_1,buffer_r_2,buffer_r_3,buffer_r_4,
@@ -860,10 +1147,12 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
         buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6,
         buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6,
         buffer_vpavperpzrs_1,buffer_vpavperpzrs_2,
+        implicit_buffer_vpavperpzrs_1,implicit_buffer_vpavperpzrs_2,implicit_buffer_vpavperpzrs_3,implicit_buffer_vpavperpzrs_4,implicit_buffer_vpavperpzrs_5,implicit_buffer_vpavperpzrs_6,
         buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6,
         buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6,
         buffer_vzvrvzetazrsn_1, buffer_vzvrvzetazrsn_2,
-        buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3)
+        buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3,
+        int_buffer_rs_1,int_buffer_rs_2)
 
 end
 
@@ -895,16 +1184,19 @@ end
 create an array of structs containing scratch arrays for the normalised pdf and low-order moments
 that may be evolved separately via fluid equations
 """
-function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages)
-    # create n_rk_stages+1 structs, each of which will contain one pdf,
-    # one density, and one parallel flow array
-    scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n_rk_stages+1)
-    pdf_dims = size(pdf_ion_in)
+function setup_scratch_arrays(moments, pdf, n)
+    # will create n_rk_stages+1 structs, each of which will contain one pdf,
+    # density, parallel flow, parallel pressure, and perpendicular pressure array for ions
+    # (possibly) the same for electrons, and the same for neutrals. The actual array will
+    # be created at the end of the first step of the loop below, once we have a
+    # `scratch_pdf` object of the correct type.
+    scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n)
+    pdf_dims = size(pdf.ion.norm)
     moment_dims = size(moments.ion.dens)
-    pdf_neutral_dims = size(pdf_neutral_in)
+    pdf_neutral_dims = size(pdf.neutral.norm)
     moment_neutral_dims = size(moments.neutral.dens)
     # populate each of the structs
-    for istage ∈ 1:n_rk_stages+1
+    for istage ∈ 1:n
         # Allocate arrays in temporary variables so that we can identify them
         # by source line when using @debug_shared_array
         pdf_array = allocate_shared_float(pdf_dims...)
@@ -925,13 +1217,13 @@ function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages)
                                       pdf_neutral_array, density_neutral_array,
                                       uz_neutral_array, pz_neutral_array)
         @serial_region begin
-            scratch[istage].pdf .= pdf_ion_in
+            scratch[istage].pdf .= pdf.ion.norm
             scratch[istage].density .= moments.ion.dens
             scratch[istage].upar .= moments.ion.upar
             scratch[istage].ppar .= moments.ion.ppar
             scratch[istage].pperp .= moments.ion.pperp
 
-            scratch[istage].pdf_neutral .= pdf_neutral_in
+            scratch[istage].pdf_neutral .= pdf.neutral.norm
             scratch[istage].density_neutral .= moments.neutral.dens
             scratch[istage].uz_neutral .= moments.neutral.uz
             scratch[istage].pz_neutral .= moments.neutral.pz
@@ -948,11 +1240,13 @@ df/dt + δv⋅∂f/∂z = 0, with δv(z,t)=v(z,t)-v₀(z)
 for prudent choice of v₀, expect δv≪v so that explicit
 time integrator can be used without severe CFL condition
 """
-function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-           moments, fields, spectral_objects, advect_objects,
-           composition, collisions, geometry, gyroavs, boundary_distributions, 
-           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
-           manufactured_source_list, ascii_io, io_moments, io_dfns)
+function time_advance!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa,
+                       vperp, gyrophase, z, r, moments, fields, spectral_objects,
+                       advect_objects, composition, collisions, geometry, gyroavs,
+                       boundary_distributions, external_source_settings, num_diss_params,
+                       nl_solver_params, advance, advance_implicit, fp_arrays,
+                       scratch_dummy, manufactured_source_list, ascii_io, io_moments,
+                       io_dfns)
 
     @debug_detect_redundant_block_synchronize begin
         # Only want to check for redundant _block_synchronize() calls during the
@@ -986,9 +1280,11 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
     moments_output_counter = 1
     dfns_output_counter = 1
     @serial_region begin
-        t_params.next_output_time[] =
-            min(t_params.moments_output_times[moments_output_counter],
-                t_params.dfns_output_times[dfns_output_counter])
+        if t_params.adaptive && !t_params.write_after_fixed_step_count
+            t_params.next_output_time[] =
+                min(t_params.moments_output_times[moments_output_counter],
+                    t_params.dfns_output_times[dfns_output_counter])
+        end
     end
     _block_synchronize()
 
@@ -1004,22 +1300,37 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
     end
     while true
         
-        diagnostic_checks = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon
-                             || t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon
-                             || t + t_params.dt[] ≥ t_params.end_time - epsilon)
+        if t_params.adaptive && !t_params.write_after_fixed_step_count
+            maybe_write_moments = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon
+                                   || t + t_params.dt[] ≥ t_params.end_time - epsilon)
+            maybe_write_dfns = (t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon
+                                || t + t_params.dt[] ≥ t_params.end_time - epsilon)
+        else
+            maybe_write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0
+                                   || t_params.step_counter[] >= t_params.nstep)
+            maybe_write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0
+                                || t_params.step_counter[] >= t_params.nstep)
+        end
+        diagnostic_checks = (maybe_write_moments || maybe_write_dfns)
         
         if t_params.split_operators
             # MRH NOT SUPPORTED
-            time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z,
-                vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-                composition, collisions, external_source_settings, num_diss_params,
-                advance, t_params.step_counter[])
+            time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params,
+                                          vpa, z, vpa_spectral, z_spectral, moments,
+                                          fields, vpa_advect, z_advect, composition,
+                                          collisions, external_source_settings,
+                                          num_diss_params, nl_solver_params, advance,
+                                          advance_implicit, t_params.step_counter[])
         else
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-                moments, fields, spectral_objects, advect_objects,
-                composition, collisions, geometry, gyroavs, boundary_distributions,
-                external_source_settings, num_diss_params, advance, fp_arrays,  scratch_dummy,
-                manufactured_source_list, diagnostic_checks, t_params.step_counter[])
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz,
+                                       vr, vzeta, vpa, vperp, gyrophase, z, r, moments,
+                                       fields, spectral_objects, advect_objects,
+                                       composition, collisions, geometry, gyroavs,
+                                       boundary_distributions, external_source_settings,
+                                       num_diss_params, nl_solver_params, advance,
+                                       advance_implicit, fp_arrays, scratch_dummy,
+                                       manufactured_source_list, diagnostic_checks,
+                                       t_params.step_counter[])
         end
         # update the time
         t += t_params.previous_dt[]
@@ -1042,7 +1353,18 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
             finish_now = true
         end
 
-        if t ≥ t_params.moments_output_times[moments_output_counter] - epsilon
+        if t_params.adaptive && !t_params.write_after_fixed_step_count
+            write_moments = (t ≥ t_params.moments_output_times[moments_output_counter] - epsilon
+                             || t ≥ t_params.end_time - epsilon)
+            write_dfns = (t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon
+                          || t ≥ t_params.end_time - epsilon)
+        else
+            write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0
+                             || t_params.step_counter[] >= t_params.nstep)
+            write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0
+                          || t_params.step_counter[] >= t_params.nstep)
+        end
+        if write_moments
             moments_output_counter += 1
             if moments_output_counter ≤ length(t_params.moments_output_times)
                 @serial_region begin
@@ -1052,10 +1374,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
                 end
             end
             write_moments = true
-        else
-            write_moments = false
         end
-        if t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon
+        if write_dfns
             dfns_output_counter += 1
             if dfns_output_counter ≤ length(t_params.dfns_output_times)
                 @serial_region begin
@@ -1065,14 +1385,9 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
                 end
             end
             write_dfns = true
-        else
-            write_dfns = false
         end
 
         if write_moments || write_dfns || finish_now
-            # update the diagnostic chodura condition
-            update_chodura!(moments,scratch[end].pdf,vpa,vperp,z,r,spectral_objects.r_spectral,composition,geometry,scratch_dummy,advect_objects.z_advect)
-
             # Always synchronise here, regardless of if we changed region or not
             begin_serial_region(no_synchronize=true)
             _block_synchronize()
@@ -1103,6 +1418,10 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
                 finish_now = true
             end
 
+            # Do MPI communication to add up counters from different processes, where
+            # necessary.
+            gather_nonlinear_solver_counters!(nl_solver_params)
+
             time_for_run = to_minutes(now() - start_time)
         end
         # write moments data to file
@@ -1132,7 +1451,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
             write_all_moments_data_to_binary(moments, fields, t,
                                              composition.n_ion_species,
                                              composition.n_neutral_species, io_moments,
-                                             iwrite_moments, time_for_run, t_params, r, z)
+                                             iwrite_moments, time_for_run, t_params,
+                                             nl_solver_params, r, z)
 
             if t_params.steady_state_residual
                 # Calculate some residuals to see how close simulation is to steady state
@@ -1215,8 +1535,9 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr
             write_all_dfns_data_to_binary(pdf, moments, fields, t,
                                           composition.n_ion_species,
                                           composition.n_neutral_species, io_dfns,
-                                          iwrite_dfns, time_for_run, t_params, r, z,
-                                          vperp, vpa, vzeta, vr, vz)
+                                          iwrite_dfns, time_for_run, t_params,
+                                          nl_solver_params, r, z, vperp, vpa, vzeta, vr,
+                                          vz)
             iwrite_dfns += 1
             begin_s_r_z_vperp_region()
             @debug_detect_redundant_block_synchronize begin
@@ -1245,9 +1566,11 @@ end
 
 """
 """
-function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z,
-    vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-    composition, collisions, external_source_settings, num_diss_params, advance, istep)
+function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, vpa,
+                                       z, vpa_spectral, z_spectral, moments, fields,
+                                       vpa_advect, z_advect, composition, collisions,
+                                       external_source_settings, num_diss_params,
+                                       nl_solver_params, advance, advance_implicit, istep)
 
     # define some abbreviated variables for tidiness
     n_ion_species = composition.n_ion_species
@@ -1260,155 +1583,179 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z,
         # advance the operator-split 1D advection equation in vpa
         # vpa-advection only applies for ion species
         advance.vpa_advection = true
-        time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+        time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
             vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-            composition, collisions, external_source_settings, num_diss_params, advance,
-            istep)
+            composition, collisions, external_source_settings, num_diss_params,
+            nl_solver_params, advance, advance_implicit, istep)
         advance.vpa_advection = false
         # z_advection! advances the operator-split 1D advection equation in z
         # apply z-advection operation to all species (ion and neutral)
         advance.z_advection = true
-        time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+        time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
             vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-            composition, collisions, external_source_settings, num_diss_params, advance,
-            istep)
+            composition, collisions, external_source_settings, num_diss_params,
+            nl_solver_params, advance, advance_implicit, istep)
         advance.z_advection = false
         # account for charge exchange collisions between ions and neutrals
         if composition.n_neutral_species > 0
             if collisions.charge_exchange > 0.0
-                advance.cx_collisions = true
-                time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+                advance.ion_cx_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                     vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                     composition, collisions, external_source_settings, num_diss_params,
-                    advance, istep)
-                advance.cx_collisions = false
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.ion_cx_collisions = false
+                advance.neutral_cx_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
+                    vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
+                    composition, collisions, external_source_settings, num_diss_params,
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.neutral_cx_collisions = false
             end
             if collisions.ionization > 0.0
-                advance.ionization_collisions = true
-                time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa,
+                advance.ion_ionization_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa,
                     z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect,
                     composition, collisions, external_source_settings, num_diss_params,
-                    advance, istep)
-                advance.ionization_collisions = false
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.ion_ionization_collisions = false
+                advance.neutral_ionization_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa,
+                    z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect,
+                    composition, collisions, external_source_settings, num_diss_params,
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.neutral_ionization_collisions = false
             end
         end
         if collisions.krook.nuii0  > 0.0
             advance.krook_collisions_ii = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa,
                 z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect,
                 z_SL, vpa_SL, composition, collisions, sources, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.krook_collisions_ii = false
         end
         # and add the source terms associated with redefining g = pdf/density or pdf*vth/density
         # to the kinetic equation
         if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar
             advance.source_terms = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.source_terms = false
         end
         # use the continuity equation to update the density
         if moments.evolve_density
             advance.continuity = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.continuity = false
         end
         # use force balance to update the parallel flow
         if moments.evolve_upar
             advance.force_balance = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.force_balance = false
         end
         # use the energy equation to update the parallel pressure
         if moments.evolve_ppar
             advance.energy = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.energy = false
         end
     else
         # use the energy equation to update the parallel pressure
         if moments.evolve_ppar
             advance.energy = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.energy = false
         end
         # use force balance to update the parallel flow
         if moments.evolve_upar
             advance.force_balance = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.force_balance = false
         end
         # use the continuity equation to update the density
         if moments.evolve_density
             advance.continuity = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.continuity = false
         end
         # and add the source terms associated with redefining g = pdf/density or pdf*vth/density
         # to the kinetic equation
         if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar
             advance.source_terms = true
-            time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+            time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                 vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                 composition, collisions, external_source_settings, num_diss_params,
-                advance, istep)
+                nl_solver_params, advance, advance_implicit, istep)
             advance.source_terms = false
         end
         # account for charge exchange collisions between ions and neutrals
         if composition.n_neutral_species > 0
             if collisions.ionization > 0.0
-                advance.ionization = true
-                time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa,
+                advance.neutral_ionization = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa,
+                    z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect,
+                    composition, collisions, external_source_settings, num_diss_params,
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.neutral_ionization = false
+                advance.ion_ionization = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa,
                     z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect,
                     composition, collisions, external_source_settings, num_diss_params,
-                    advance, istep)
-                advance.ionization = false
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.ion_ionization = false
             end
             if collisions.charge_exchange > 0.0
-                advance.cx_collisions = true
-                time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+                advance.neutral_cx_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
+                    vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
+                    composition, collisions, external_source_settings, num_diss_params,
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.neutral_cx_collisions = false
+                advance.ion_cx_collisions = true
+                time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
                     vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
                     composition, collisions, external_source_settings, num_diss_params,
-                    advance, istep)
-                advance.cx_collisions = false
+                    nl_solver_params, advance, advance_implicit, istep)
+                advance.ion_cx_collisions = false
             end
         end
         # z_advection! advances the operator-split 1D advection equation in z
         # apply z-advection operation to all species (ion and neutral)
         advance.z_advection = true
-        time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+        time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
             vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-            composition, collisions, external_source_settings, num_diss_params, advance,
-            istep)
+            composition, collisions, external_source_settings, num_diss_params,
+            nl_solver_params, advance, advance_implicit, istep)
         advance.z_advection = false
         # advance the operator-split 1D advection equation in vpa
         # vpa-advection only applies for ion species
         advance.vpa_advection = true
-        time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z,
+        time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z,
             vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect,
-            composition, collisions, external_source_settings, num_diss_params, advance,
-            istep)
+            composition, collisions, external_source_settings, num_diss_params,
+            nl_solver_params, advance, advance_implicit, istep)
         advance.vpa_advection = false
     end
     return nothing
@@ -1416,217 +1763,193 @@ end
 
 """
 """
-function time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-           moments, fields, spectral_objects, advect_objects,
-           composition, collisions, geometry, gyroavs, boundary_distributions,
-           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
-           manufactured_source_list, diagnostic_checks, istep)
-
-    ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-        moments, fields, spectral_objects, advect_objects, composition, collisions,
-        geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params,
-        advance, fp_arrays, scratch_dummy, manufactured_source_list, diagnostic_checks, istep)
+function time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, vr,
+                                    vzeta, vpa, vperp, gyrophase, z, r, moments, fields,
+                                    spectral_objects, advect_objects, composition,
+                                    collisions, geometry, gyroavs, boundary_distributions,
+                                    external_source_settings, num_diss_params,
+                                    nl_solver_params, advance, advance_implicit,
+                                    fp_arrays, scratch_dummy, manufactured_source_list,
+                                    diagnostic_checks, istep)
+
+    ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp,
+            gyrophase, z, r, moments, fields, spectral_objects, advect_objects,
+            composition, collisions, geometry, gyroavs, boundary_distributions,
+            external_source_settings, num_diss_params, nl_solver_params, advance,
+            advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list,
+            diagnostic_checks, istep)
 
     return nothing
 end
 
 """
-use information obtained from the Runge-Kutta stages to compute the updated pdf;
-for the quantities (density, upar, ppar, vth, qpar and phi) that are derived
-from the 'true', un-modified pdf, either: update them using info from Runge Kutta
-stages, if the quantities are evolved separately from the modified pdf;
-or update them by taking the appropriate velocity moment of the evolved pdf
+Use the result of the forward-Euler timestep and the previous Runge-Kutta stages to
+compute the updated pdfs, and any evolved moments.
 """
-function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, vzeta,
-                    vpa, vperp, z, r, spectral_objects, advect_objects, t, t_params,
-                    istage, composition, collisions, geometry, external_source_settings,
-                    gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments,
-                    istep)
+function rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition)
     begin_s_r_z_region()
 
     new_scratch = scratch[istage+1]
     old_scratch = scratch[istage]
     rk_coefs = t_params.rk_coefs[:,istage]
 
-    z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral
-    vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral
-    vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect
-    neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect
-
     ##
     # update the ion distribution and moments
     ##
     # here we seem to have duplicate arrays for storing n, u||, p||, etc, but not for vth
     # 'scratch' is for the multiple stages of time advanced quantities, but 'moments' can be updated directly at each stage
-    rk_update_variable!(scratch, :pdf, t_params, istage)
+    rk_update_variable!(scratch, scratch_implicit, :pdf, t_params, istage)
     # use Runge Kutta to update any velocity moments evolved separately from the pdf
-    rk_update_evolved_moments!(scratch, moments, t_params, istage)
-
-    # Ensure there are no negative values in the pdf before applying boundary
-    # conditions, so that negative deviations do not mess up the integral-constraint
-    # corrections in the sheath boundary conditions.
-    force_minimum_pdf_value!(new_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value)
-
-    # Enforce boundary conditions in z and vpa on the distribution function.
-    # Must be done after Runge Kutta update so that the boundary condition applied to
-    # the updated pdf is consistent with the updated moments - otherwise different upar
-    # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be
-    # set to zero at the sheath boundary according to the final upar has a non-zero
-    # contribution from one or more of the terms.
-    # NB: probably need to do the same for the evolved moments
-    enforce_boundary_conditions!(new_scratch, moments,
-        boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z,
-        r, vpa_spectral, vperp_spectral, 
-        vpa_advect, vperp_advect, z_advect, r_advect, composition, scratch_dummy,
-        advance.r_diffusion, advance.vpa_diffusion, advance.vperp_diffusion)
-
-    if moments.evolve_density && moments.enforce_conservation
-        begin_s_r_z_region()
-        A = moments.ion.constraints_A_coefficient
-        B = moments.ion.constraints_B_coefficient
-        C = moments.ion.constraints_C_coefficient
-        @loop_s_r_z is ir iz begin
-            (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) =
-                @views hard_force_moment_constraints!(new_scratch.pdf[:,:,iz,ir,is],
-                                                     moments, vpa)
-        end
-    end
-
-    function update_derived_ion_moments_and_derivatives()
-        # update remaining velocity moments that are calculable from the evolved pdf
-        # Note these may be needed for the boundary condition on the neutrals, so must be
-        # calculated before that is applied. Also may be needed to calculate advection speeds
-        # for for CFL stability limit calculations in adaptive_timestep_update!().
-        update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition,
-            r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments)
-
-        calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, z_spectral,
-                                          num_diss_params.ion.moment_dissipation_coefficient)
-    end
-    update_derived_ion_moments_and_derivatives()
+    rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage)
 
     if composition.n_neutral_species > 0
         ##
         # update the neutral particle distribution and moments
         ##
-        rk_update_variable!(scratch, :pdf_neutral, t_params, istage; neutrals=true)
+        rk_update_variable!(scratch, scratch_implicit, :pdf_neutral, t_params, istage; neutrals=true)
         # use Runge Kutta to update any velocity moments evolved separately from the pdf
-        rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage)
+        rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params, istage)
+    end
+end
+
+"""
+Apply boundary conditions and moment constraints to updated pdfs and calculate derived
+moments and moment derivatives
+"""
+function apply_all_bcs_constraints_update_moments!(
+        this_scratch, moments, fields, boundary_distributions, vz, vr, vzeta, vpa, vperp,
+        z, r, spectral_objects, advect_objects, composition, geometry, gyroavs,
+        num_diss_params, advance, scratch_dummy, diagnostic_moments; pdf_bc_constraints=true)
+
+    begin_s_r_z_region()
+
+    z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral
+    vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral
+    vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect
+    neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect
 
+    if pdf_bc_constraints
         # Ensure there are no negative values in the pdf before applying boundary
         # conditions, so that negative deviations do not mess up the integral-constraint
         # corrections in the sheath boundary conditions.
-        force_minimum_pdf_value_neutral!(new_scratch.pdf_neutral, num_diss_params.neutral.force_minimum_pdf_value)
+        force_minimum_pdf_value!(this_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value)
 
         # Enforce boundary conditions in z and vpa on the distribution function.
-        # Must be done after Runge Kutta update so that the boundary condition applied to
-        # the updated pdf is consistent with the updated moments - otherwise different upar
-        # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be
-        # set to zero at the sheath boundary according to the final upar has a non-zero
-        # contribution from one or more of the terms.
-        # NB: probably need to do the same for the evolved moments
-        # Note, so far vr and vzeta do not need advect objects, so pass `nothing` for
-        # those as a placeholder
-        enforce_neutral_boundary_conditions!(new_scratch.pdf_neutral, new_scratch.pdf,
-            boundary_distributions, new_scratch.density_neutral, new_scratch.uz_neutral,
-            new_scratch.pz_neutral, moments, new_scratch.density, new_scratch.upar,
-            fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect,
-            neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz,
-            composition, geometry, scratch_dummy, advance.r_diffusion,
-            advance.vz_diffusion)
+        # Must be done after Runge Kutta update so that the boundary condition applied to the
+        # updated pdf is consistent with the updated moments - otherwise different upar
+        # between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point that
+        # should be set to zero at the sheath boundary according to the final upar has a
+        # non-zero contribution from one or more of the terms.  NB: probably need to do the
+        # same for the evolved moments
+        enforce_boundary_conditions!(this_scratch, moments,
+            boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z, r,
+            vpa_spectral, vperp_spectral, vpa_advect, vperp_advect, z_advect, r_advect,
+            composition, scratch_dummy, advance.r_diffusion, advance.vpa_diffusion,
+            advance.vperp_diffusion)
 
         if moments.evolve_density && moments.enforce_conservation
-            begin_sn_r_z_region()
-            A = moments.neutral.constraints_A_coefficient
-            B = moments.neutral.constraints_B_coefficient
-            C = moments.neutral.constraints_C_coefficient
-            @loop_sn_r_z isn ir iz begin
-                (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) =
-                    @views hard_force_moment_constraints_neutral!(
-                        new_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz)
+            begin_s_r_z_region()
+            A = moments.ion.constraints_A_coefficient
+            B = moments.ion.constraints_B_coefficient
+            C = moments.ion.constraints_C_coefficient
+            @loop_s_r_z is ir iz begin
+                (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) =
+                    @views hard_force_moment_constraints!(this_scratch.pdf[:,:,iz,ir,is],
+                                                          moments, vpa)
             end
         end
+    end
 
-        function update_derived_neutral_moments_and_derivatives()
-            # update remaining velocity moments that are calculable from the evolved pdf
-            update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r,
-                                            composition)
-            # update the thermal speed
-            begin_sn_r_z_region()
-            @loop_sn_r_z isn ir iz begin
-                moments.neutral.vth[iz,ir,isn] = sqrt(2.0*new_scratch.pz_neutral[iz,ir,isn]/new_scratch.density_neutral[iz,ir,isn])
-            end
-
-            # update the parallel heat flux
-            update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated,
-                               new_scratch.density_neutral, new_scratch.uz_neutral,
-                               moments.neutral.vth, new_scratch.pdf_neutral, vz, vr, vzeta, z,
-                               r, composition, moments.evolve_density, moments.evolve_upar,
-                               moments.evolve_ppar)
+    # update remaining velocity moments that are calculable from the evolved pdf
+    # Note these may be needed for the boundary condition on the neutrals, so must be
+    # calculated before that is applied. Also may be needed to calculate advection speeds
+    # for for CFL stability limit calculations in adaptive_timestep_update!().
+    update_derived_moments!(this_scratch, moments, vpa, vperp, z, r, composition,
+        r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments)
 
-            calculate_neutral_moment_derivatives!(moments, new_scratch, scratch_dummy, z,
-                                                  z_spectral,
-                                                  num_diss_params.neutral.moment_dissipation_coefficient)
-        end
-        update_derived_neutral_moments_and_derivatives()
-    end
+    calculate_ion_moment_derivatives!(moments, this_scratch, scratch_dummy, z, z_spectral,
+                                      num_diss_params.ion.moment_dissipation_coefficient)
 
     # update the electrostatic potential phi
-    update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral,
-                r_spectral, scratch_dummy, gyroavs)
-    # _block_synchronize() here because phi needs to be read on different ranks than
-    # it was written on, even though the loop-type does not change here. However,
-    # after the final RK stage can skip if:
-    #  * evolving upar or ppar as synchronization will be triggered after moments
-    #    updates at the beginning of the next RK step
-    _block_synchronize()
+    update_phi!(fields, this_scratch, vperp, z, r, composition, z_spectral, r_spectral,
+                scratch_dummy, gyroavs)
 
-    if t_params.adaptive && istage == t_params.n_rk_stages
-        # Note the timestep update must be done before calculating derived moments and
-        # moment derivatives, because the timstep might need to be re-done with a smaller
-        # dt, in which case scratch[t_params.n_rk_stages+1] will be reset to the values
-        # from the beginning of the timestep here.
-        adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition,
-                                  collisions, geometry, external_source_settings,
-                                  advect_objects, r, z, vperp, vpa, vzeta, vr, vz)
-        # Re-do this in case adaptive_timestep_update re-arranged the `scratch` vector
-        new_scratch = scratch[istage+1]
-        old_scratch = scratch[istage]
-
-        if t_params.previous_dt[] == 0.0
-            # Re-update remaining velocity moments that are calculable from the evolved
-            # pdf These need to be re-calculated because `new_scratch` was swapped with
-            # the beginning of the timestep, because the timestep failed
-            update_derived_ion_moments_and_derivatives()
-            if composition.n_neutral_species > 0
-                update_derived_neutral_moments_and_derivatives()
-            end
+    if composition.n_neutral_species > 0
+        if pdf_bc_constraints
+            # Ensure there are no negative values in the pdf before applying boundary
+            # conditions, so that negative deviations do not mess up the integral-constraint
+            # corrections in the sheath boundary conditions.
+            force_minimum_pdf_value_neutral!(this_scratch.pdf_neutral,
+                                             num_diss_params.neutral.force_minimum_pdf_value)
+
+            # Enforce boundary conditions in z and vpa on the distribution function.
+            # Must be done after Runge Kutta update so that the boundary condition applied to
+            # the updated pdf is consistent with the updated moments - otherwise different
+            # upar between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point
+            # that should be set to zero at the sheath boundary according to the final upar
+            # has a non-zero contribution from one or more of the terms.  NB: probably need to
+            # do the same for the evolved moments Note, so far vr and vzeta do not need advect
+            # objects, so pass `nothing` for those as a placeholder
+            enforce_neutral_boundary_conditions!(this_scratch.pdf_neutral, this_scratch.pdf,
+                boundary_distributions, this_scratch.density_neutral, this_scratch.uz_neutral,
+                this_scratch.pz_neutral, moments, this_scratch.density, this_scratch.upar,
+                fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect,
+                neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz,
+                composition, geometry, scratch_dummy, advance.r_diffusion,
+                advance.vz_diffusion)
 
-            # update the electrostatic potential phi
-            update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral,
-                        r_spectral, scratch_dummy, gyroavs)
-            if !(( moments.evolve_upar || moments.evolve_ppar) &&
-                      istage == length(scratch)-1)
-                # _block_synchronize() here because phi needs to be read on different ranks than
-                # it was written on, even though the loop-type does not change here. However,
-                # after the final RK stage can skip if:
-                #  * evolving upar or ppar as synchronization will be triggered after moments
-                #    updates at the beginning of the next RK step
-                _block_synchronize()
+            if moments.evolve_density && moments.enforce_conservation
+                begin_sn_r_z_region()
+                A = moments.neutral.constraints_A_coefficient
+                B = moments.neutral.constraints_B_coefficient
+                C = moments.neutral.constraints_C_coefficient
+                @loop_sn_r_z isn ir iz begin
+                    (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) =
+                        @views hard_force_moment_constraints_neutral!(
+                            this_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz)
+                end
             end
         end
+
+        # update remaining velocity moments that are calculable from the evolved pdf
+        update_derived_moments_neutral!(this_scratch, moments, vz, vr, vzeta, z, r,
+                                        composition)
+        # update the thermal speed
+        begin_sn_r_z_region()
+        @loop_sn_r_z isn ir iz begin
+            moments.neutral.vth[iz,ir,isn] = sqrt(2.0*this_scratch.pz_neutral[iz,ir,isn]/this_scratch.density_neutral[iz,ir,isn])
+        end
+
+        # update the parallel heat flux
+        update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated,
+                           this_scratch.density_neutral, this_scratch.uz_neutral,
+                           moments.neutral.vth, this_scratch.pdf_neutral, vz, vr, vzeta, z,
+                           r, composition, moments.evolve_density, moments.evolve_upar,
+                           moments.evolve_ppar)
+
+        calculate_neutral_moment_derivatives!(moments, this_scratch, scratch_dummy, z,
+                                              z_spectral,
+                                              num_diss_params.neutral.moment_dissipation_coefficient)
     end
 end
 
 """
-    adaptive_timestep_update!(scratch, t_params, rk_coefs, moments, n_neutral_species)
+    adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments,
+                              fields, composition, collisions, geometry,
+                              external_source_settings, spectral_objects,
+                              advect_objects, gyroavs, num_diss_params, advance,
+                              scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz,
+                              success, nl_max_its_fraction)
 
 Check the error estimate for the embedded RK method and adjust the timestep if
 appropriate.
 """
-function adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition,
+function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments,
+                                   fields, boundary_distributions, composition,
                                    collisions, geometry, external_source_settings,
-                                   advect_objects, r, z, vperp, vpa, vzeta, vr, vz)
+                                   spectral_objects, advect_objects, gyroavs,
+                                   num_diss_params, advance, scratch_dummy, r, z, vperp,
+                                   vpa, vzeta, vr, vz, success, nl_max_its_fraction)
     #error_norm_method = "Linf"
     error_norm_method = "L2"
 
@@ -1662,30 +1985,34 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     # reduction over the shared-memory block, so all processes must calculate the same
     # species at the same time.
     begin_r_vperp_vpa_region(; no_synchronize=true)
-    ion_z_CFL = Inf
-    @loop_s is begin
-        update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar,
-                        evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is)
-        this_minimum = get_minimum_CFL_z(z_advect[is].speed, z)
-        @serial_region begin
-            ion_z_CFL = min(ion_z_CFL, this_minimum)
+    if !t_params.implicit_ion_advance
+        ion_z_CFL = Inf
+        @loop_s is begin
+            update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar,
+                            evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is)
+            this_minimum = get_minimum_CFL_z(z_advect[is].speed, z)
+            @serial_region begin
+                ion_z_CFL = min(ion_z_CFL, this_minimum)
+            end
         end
+        push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL)
     end
-    push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL)
 
-    # ion vpa-advection
-    begin_r_z_vperp_region()
-    ion_vpa_CFL = Inf
-    update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r,
-                      composition, collisions, external_source_settings.ion, t,
-                      geometry)
-    @loop_s is begin
-        this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa)
-        @serial_region begin
-            ion_vpa_CFL = min(ion_vpa_CFL, this_minimum)
+    if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection)
+        # ion vpa-advection
+        begin_r_z_vperp_region()
+        ion_vpa_CFL = Inf
+        update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r,
+                          composition, collisions, external_source_settings.ion, t,
+                          geometry)
+        @loop_s is begin
+            this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa)
+            @serial_region begin
+                ion_vpa_CFL = min(ion_vpa_CFL, this_minimum)
+            end
         end
+        push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL)
     end
-    push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL)
 
     # To avoid double counting points when we use distributed-memory MPI, skip the
     # inner/lower point in r and z if this process is not the first block in that
@@ -1693,12 +2020,56 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     skip_r_inner = r.irank != 0
     skip_z_lower = z.irank != 0
 
-    # Calculate error for ion distribution functions
-    # Note rk_error_variable!() stores the calculated error in `scratch[2]`.
-    rk_error_variable!(scratch, :pdf, t_params)
-    ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[end].pdf, t_params.rtol,
-                                     t_params.atol; method=error_norm_method,
-                                     skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower,
+    # Calculate low-order approximations, from which the timestep error can be estimated.
+    # Note we store the calculated low-order approxmation in `scratch[2]`.
+    rk_loworder_solution!(scratch, scratch_implicit, :pdf, t_params)
+    if moments.evolve_density
+        begin_s_r_z_region()
+        rk_loworder_solution!(scratch, scratch_implicit, :density, t_params)
+    end
+    if moments.evolve_upar
+        begin_s_r_z_region()
+        rk_loworder_solution!(scratch, scratch_implicit, :upar, t_params)
+    end
+    if moments.evolve_ppar
+        begin_s_r_z_region()
+        rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params)
+    end
+    if n_neutral_species > 0
+        begin_sn_r_z_vzeta_vr_region()
+        rk_loworder_solution!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true)
+        if moments.evolve_density
+            begin_sn_r_z_region()
+            rk_loworder_solution!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true)
+        end
+        if moments.evolve_upar
+            begin_sn_r_z_region()
+            rk_loworder_solution!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true)
+        end
+        if moments.evolve_ppar
+            begin_sn_r_z_region()
+            rk_loworder_solution!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true)
+        end
+    end
+
+    # Apply boundary conditions and constraints
+    apply_all_bcs_constraints_update_moments!(
+        scratch[2], moments, fields, boundary_distributions, vz, vr, vzeta,
+        vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry,
+        gyroavs, num_diss_params, advance, scratch_dummy, false)
+
+    # Re-calculate moment derivatives in the `moments` struct, in case they were changed
+    # by the previous call
+    apply_all_bcs_constraints_update_moments!(
+        scratch[t_params.n_rk_stages+1], moments, fields, boundary_distributions, vz, vr,
+        vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry,
+        gyroavs, num_diss_params, advance, scratch_dummy, false; pdf_bc_constraints=false)
+
+    # Calculate the timstep error estimates
+    ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[t_params.n_rk_stages+1].pdf,
+                                     t_params.rtol, t_params.atol;
+                                     method=error_norm_method, skip_r_inner=skip_r_inner,
+                                     skip_z_lower=skip_z_lower,
                                      error_sum_zero=t_params.error_sum_zero)
     push!(error_norms, ion_pdf_error)
     push!(total_points,
@@ -1707,8 +2078,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     # Calculate error for ion moments, if necessary
     if moments.evolve_density
         begin_s_r_z_region()
-        rk_error_variable!(scratch, :density, t_params)
-        ion_n_err = local_error_norm(scratch[2].density, scratch[end].density,
+        ion_n_err = local_error_norm(scratch[2].density,
+                                     scratch[t_params.n_rk_stages+1].density,
                                      t_params.rtol, t_params.atol;
                                      method=error_norm_method, skip_r_inner=skip_r_inner,
                                      skip_z_lower=skip_z_lower,
@@ -1718,8 +2089,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     end
     if moments.evolve_upar
         begin_s_r_z_region()
-        rk_error_variable!(scratch, :upar, t_params)
-        ion_u_err = local_error_norm(scratch[2].upar, scratch[end].upar, t_params.rtol,
+        ion_u_err = local_error_norm(scratch[2].upar,
+                                     scratch[t_params.n_rk_stages+1].upar, t_params.rtol,
                                      t_params.atol; method=error_norm_method,
                                      skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower,
                                      error_sum_zero=t_params.error_sum_zero)
@@ -1728,8 +2099,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     end
     if moments.evolve_ppar
         begin_s_r_z_region()
-        rk_error_variable!(scratch, :ppar, t_params)
-        ion_p_err = local_error_norm(scratch[2].ppar, scratch[end].ppar, t_params.rtol,
+        ion_p_err = local_error_norm(scratch[2].ppar,
+                                     scratch[t_params.n_rk_stages+1].ppar, t_params.rtol,
                                      t_params.atol; method=error_norm_method,
                                      skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower,
                                      error_sum_zero=t_params.error_sum_zero)
@@ -1770,7 +2141,6 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
         push!(CFL_limits, t_params.CFL_prefactor * neutral_vz_CFL)
 
         # Calculate error for neutral distribution functions
-        rk_error_variable!(scratch, :pdf_neutral, t_params; neutrals=true)
         neut_pdf_error = local_error_norm(scratch[2].pdf_neutral,
                                           scratch[end].pdf_neutral, t_params.rtol,
                                           t_params.atol; method=error_norm_method,
@@ -1785,7 +2155,6 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
         # Calculate error for neutral moments, if necessary
         if moments.evolve_density
             begin_sn_r_z_region()
-            rk_error_variable!(scratch, :density_neutral, t_params; neutrals=true)
             neut_n_err = local_error_norm(scratch[2].density_neutral,
                                           scratch[end].density_neutral, t_params.rtol,
                                           t_params.atol, true; method=error_norm_method,
@@ -1797,8 +2166,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
         end
         if moments.evolve_upar
             begin_sn_r_z_region()
-            rk_error_variable!(scratch, :uz_neutral, t_params; neutrals=true)
-            neut_u_err = local_error_norm(scratch[2].uz_neutral, scratch[end].uz_neutral,
+            neut_u_err = local_error_norm(scratch[2].uz_neutral,
+                                          scratch[t_params.n_rk_stages+1].uz_neutral,
                                           t_params.rtol, t_params.atol, true;
                                           method=error_norm_method,
                                           skip_r_inner=skip_r_inner,
@@ -1809,8 +2178,8 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
         end
         if moments.evolve_ppar
             begin_sn_r_z_region()
-            rk_error_variable!(scratch, :pz_neutral, t_params; neutrals=true)
-            neut_p_err = local_error_norm(scratch[2].pz_neutral, scratch[end].pz_neutral,
+            neut_p_err = local_error_norm(scratch[2].pz_neutral,
+                                          scratch[t_params.n_rk_stages+1].pz_neutral,
                                           t_params.rtol, t_params.atol, true;
                                           method=error_norm_method,
                                           skip_r_inner=skip_r_inner,
@@ -1822,100 +2191,31 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos
     end
 
     adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms,
-                                       total_points, current_dt, error_norm_method)
-
-    return nothing
-end
+                                       total_points, current_dt, error_norm_method,
+                                       success, nl_max_its_fraction)
 
-"""
-update velocity moments that are calculable from the evolved ion pdf
-"""
-function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition,
-    r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments)
-    
-    if composition.gyrokinetic_ions
-        ff = scratch_dummy.buffer_vpavperpzrs_1
-        # fill buffer with ring-averaged F (gyroaverage at fixed position)
-        gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition)
-    else
-        ff = new_scratch.pdf
+    if t_params.previous_dt[] == 0.0
+        # Re-update remaining velocity moments that are calculable from the evolved
+        # pdf These need to be re-calculated because `scratch[istage+1]` is now the
+        # state at the beginning of the timestep, because the timestep failed
+        apply_all_bcs_constraints_update_moments!(
+            scratch[t_params.n_rk_stages+1], moments, fields, nothing, vz, vr, vzeta,
+            vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry,
+            gyroavs, num_diss_params, advance, scratch_dummy, false;
+            pdf_bc_constraints=false)
     end
-    
-    if !moments.evolve_density
-        update_density!(new_scratch.density, moments.ion.dens_updated,
-                        ff, vpa, vperp, z, r, composition)
-    end
-    if !moments.evolve_upar
-        update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density,
-                     new_scratch.ppar, ff, vpa, vperp, z, r, composition,
-                     moments.evolve_density, moments.evolve_ppar)
-    end
-    if !moments.evolve_ppar
-        # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e)
-        update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density,
-                     new_scratch.upar, ff, vpa, vperp, z, r, composition,
-                     moments.evolve_density, moments.evolve_upar)
-    end
-    update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition)
-    
-    # if diagnostic time step/RK stage
-    # update the diagnostic chodura condition
-    if diagnostic_moments
-        update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect)
-    end
-    # update the thermal speed
-    begin_s_r_z_region()
-    try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible
-        update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition)
-    catch e
-        if global_size[] > 1
-            println("ERROR: error calculating vth in time_advance.jl")
-            println(e)
-            display(stacktrace(catch_backtrace()))
-            flush(stdout)
-            flush(stderr)
-            MPI.Abort(comm_world, 1)
-        end
-        rethrow(e)
-    end
-    # update the parallel heat flux
-    update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density,
-                 new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r,
-                 composition, moments.evolve_density, moments.evolve_upar,
-                 moments.evolve_ppar)
-    # add further moments to be computed here
-    
-end
 
-"""
-update velocity moments that are calculable from the evolved neutral pdf
-"""
-function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r,
-                                         composition)
-    if !moments.evolve_density
-        update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated,
-                                new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition)
-    end
-    if !moments.evolve_upar
-        update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated,
-                           new_scratch.density_neutral, new_scratch.pz_neutral,
-                           new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition,
-                           moments.evolve_density, moments.evolve_ppar)
-    end
-    if !moments.evolve_ppar
-        update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated,
-                           new_scratch.density_neutral, new_scratch.uz_neutral,
-                           new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition,
-                           moments.evolve_density, moments.evolve_upar)
-    end
+    return nothing
 end
 
 """
 """
-function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
-           moments, fields, spectral_objects, advect_objects, composition, collisions,
-           geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params,
-           advance, fp_arrays, scratch_dummy, manufactured_source_list,diagnostic_checks, istep)
+function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp,
+                 gyrophase, z, r, moments, fields, spectral_objects, advect_objects,
+                 composition, collisions, geometry, gyroavs, boundary_distributions,
+                 external_source_settings, num_diss_params, nl_solver_params, advance,
+                 advance_implicit, fp_arrays, scratch_dummy,
+                 manufactured_source_list,diagnostic_checks, istep)
 
     begin_s_r_z_region()
 
@@ -1950,25 +2250,116 @@ function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase
         _block_synchronize()
     end
 
+    # success is set to false if an iteration failed to converge in an implicit solve
+    success = true
     for istage ∈ 1:n_rk_stages
-        # do an Euler time advance, with scratch[2] containing the advanced quantities
-        # and scratch[1] containing quantities at time level n
-        update_solution_vector!(scratch, moments, istage, composition, vpa, vperp, z, r)
+        if t_params.rk_coefs_implicit !== nothing
+            update_solution_vector!(scratch_implicit[istage], scratch[istage], moments,
+                                    composition, vpa, vperp, z, r)
+            if t_params.implicit_coefficient_is_zero[istage]
+                # No implicit solve needed at this stage. Do an explicit step of the
+                # implicitly-evolved terms so we can store their time-derivative at this
+                # stage.
+                euler_time_advance!(scratch_implicit[istage], scratch[istage],
+                                    pdf, fields, moments, advect_objects, vz, vr, vzeta,
+                                    vpa, vperp, gyrophase, z, r, t, t_params.dt[],
+                                    spectral_objects, composition, collisions, geometry,
+                                    scratch_dummy, manufactured_source_list,
+                                    external_source_settings, num_diss_params,
+                                    advance_implicit, fp_arrays, istage)
+                # The result of the forward-Euler step is just a hack to store the
+                # (explicit) time-derivative of the implicitly advanced terms. The result
+                # is not used as input to the explicit part of the IMEX advance.
+                old_scratch = scratch[istage]
+            else
+                # Backward-Euler step for implicitly-evolved terms.
+                # Note the timestep for this solve is rk_coefs_implict[istage,istage]*dt.
+                # The diagonal elements are equal to the Butcher 'a' coefficients
+                # rk_coefs_implicit[istage,istage]=a[istage,istage].
+                success = backward_euler!(scratch_implicit[istage], scratch[istage], pdf,
+                                          fields, moments, advect_objects, vz, vr, vzeta,
+                                          vpa, vperp, gyrophase, z, r, t, t_params.dt[] *
+                                          t_params.rk_coefs_implicit[istage,istage],
+                                          spectral_objects, composition, collisions,
+                                          geometry, scratch_dummy,
+                                          manufactured_source_list,
+                                          external_source_settings, num_diss_params,
+                                          gyroavs, nl_solver_params, advance_implicit,
+                                          fp_arrays, istage)
+                success = MPI.Allreduce(success, &, comm_world)
+                if !success
+                    # Break out of the istage loop, as passing `success = false` to the
+                    # adaptive timestep update function will signal a failed timestep, so
+                    # that we restart this timestep with a smaller `dt`.
+                    break
+                end
+                # The result of the implicit solve gives the state vector at 'istage'
+                # which is used as input to the explicit part of the IMEX time step.
+                old_scratch = scratch_implicit[istage]
+                apply_all_bcs_constraints_update_moments!(
+                    scratch_implicit[istage], moments, fields, boundary_distributions, vz,
+                    vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects,
+                    composition, geometry, gyroavs, num_diss_params, advance,
+                    scratch_dummy, false)
+            end
+        else
+            # Fully explicit method starts the forward-Euler step with the result from the
+            # previous stage.
+            old_scratch = scratch[istage]
+        end
+        update_solution_vector!(scratch[istage+1], old_scratch, moments, composition, vpa,
+                                vperp, z, r)
+        # do an Euler time advance, with scratch[istage+1] containing the advanced
+        # quantities and scratch[istage] containing quantities at time level n, RK stage
+        # istage
         # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf
-        euler_time_advance!(scratch[istage+1], scratch[istage],
-            pdf, fields, moments,
-            advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t,
-            t_params.dt[], spectral_objects, composition,
-            collisions, geometry, scratch_dummy, manufactured_source_list,
-            external_source_settings, num_diss_params, advance, fp_arrays, istage)
+        euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments,
+                            advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z,
+                            r, t, t_params.dt[], spectral_objects, composition,
+                            collisions, geometry, scratch_dummy,
+                            manufactured_source_list, external_source_settings,
+                            num_diss_params, advance, fp_arrays, istage)
+
+        rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition)
+
+        # Always apply boundary conditions and constraints here for explicit schemes. For
+        # IMEX schemes, only apply boundary conditions and constraints at the final RK
+        # stage - for other stages they are imposed after the implicit part of the step.
+        # If `implicit_coefficient_is_zero` is true for the next stage, then this step is
+        # explicit, so we need the bcs and constraints.
+        apply_bc_constraints = (t_params.rk_coefs_implicit === nothing
+                                || istage == n_rk_stages
+                                || t_params.implicit_coefficient_is_zero[istage+1])
         diagnostic_moments = diagnostic_checks && istage == n_rk_stages
-        @views rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr,
-                          vzeta, vpa, vperp, z, r, spectral_objects, advect_objects,
-                          t, t_params, istage, composition, collisions, geometry,
-                          external_source_settings, gyroavs, num_diss_params, advance,
-                          scratch_dummy, diagnostic_moments, istep)
+        apply_all_bcs_constraints_update_moments!(
+            scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta,
+            vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry,
+            gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments;
+            pdf_bc_constraints=apply_bc_constraints)
+    end
+
+    if t_params.adaptive
+        nl_max_its_fraction = 0.0
+        for p ∈ nl_solver_params
+            if p !== nothing
+                nl_max_its_fraction =
+                    max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations,
+                        nl_max_its_fraction)
+            end
+        end
+        adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, fields,
+                                  boundary_distributions, composition, collisions,
+                                  geometry, external_source_settings, spectral_objects,
+                                  advect_objects, gyroavs, num_diss_params, advance,
+                                  scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success,
+                                  nl_max_its_fraction)
+    elseif !success
+        error("Implicit part of timestep failed")
     end
 
+    reset_nonlinear_per_stage_counters(nl_solver_params.ion_advance)
+    reset_nonlinear_per_stage_counters(nl_solver_params.vpa_advection)
+
     istage = n_rk_stages+1
 
     # update the pdf.norm and moments arrays as needed
@@ -2126,31 +2517,51 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
         source_terms_manufactured!(fvec_out.pdf, fvec_out.pdf_neutral, vz, vr, vzeta, vpa, vperp, z, r, t, dt, composition, manufactured_source_list)
     end
 
-    if advance.cx_collisions || advance.ionization_collisions
+    if advance.ion_cx_collisions || advance.ion_ionization_collisions
         # gyroaverage neutral dfn and place it in the ion.buffer array for use in the collisions step
         vzvrvzeta_to_vpavperp!(pdf.ion.buffer, fvec_in.pdf_neutral, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, geometry, composition)
+    end
+    if advance.neutral_cx_collisions || advance.neutral_ionization_collisions
         # interpolate ion particle dfn and place it in the neutral.buffer array for use in the collisions step
         vpavperp_to_vzvrvzeta!(pdf.neutral.buffer, fvec_in.pdf, vz, vr, vzeta, vpa, vperp, z, r, geometry, composition)
     end
 
     # account for charge exchange collisions between ions and neutrals
-    if advance.cx_collisions_1V
-        charge_exchange_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in,
-                                       moments, composition, vpa, vz,
-                                       collisions.charge_exchange, vpa_spectral,
-                                       vz_spectral, dt)
-    elseif advance.cx_collisions
-        charge_exchange_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, pdf.neutral.buffer, fvec_in, composition,
-                                        vz, vr, vzeta, vpa, vperp, z, r, collisions.charge_exchange, dt)
+    if advance.ion_cx_collisions_1V
+        ion_charge_exchange_collisions_1V!(fvec_out.pdf, fvec_in, moments, composition,
+                                           vpa, vz, collisions.charge_exchange,
+                                           vpa_spectral, vz_spectral, dt)
+    elseif advance.ion_cx_collisions
+        ion_charge_exchange_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in,
+                                           composition, vz, vr, vzeta, vpa, vperp, z, r,
+                                           collisions.charge_exchange, dt)
+    end
+    if advance.neutral_cx_collisions_1V
+        neutral_charge_exchange_collisions_1V!(fvec_out.pdf_neutral, fvec_in, moments,
+                                               composition, vpa, vz,
+                                               collisions.charge_exchange, vpa_spectral,
+                                               vz_spectral, dt)
+    elseif advance.neutral_cx_collisions
+        neutral_charge_exchange_collisions_3V!(fvec_out.pdf_neutral, pdf.neutral.buffer,
+                                               fvec_in, composition, vz, vr, vzeta, vpa,
+                                               vperp, z, r, collisions.charge_exchange,
+                                               dt)
     end
     # account for ionization collisions between ions and neutrals
-    if advance.ionization_collisions_1V
-        ionization_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in, vz, vpa,
-                                  vperp, z, r, vz_spectral, moments, composition,
-                                  collisions, dt)
-    elseif advance.ionization_collisions
-        ionization_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, fvec_in, composition,
-                                        vz, vr, vzeta, vpa, vperp, z, r, collisions, dt)
+    if advance.ion_ionization_collisions_1V
+        ion_ionization_collisions_1V!(fvec_out.pdf, fvec_in, vz, vpa, vperp, z, r,
+                                      vz_spectral, moments, composition, collisions, dt)
+    elseif advance.ion_ionization_collisions
+        ion_ionization_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in, composition,
+                                      vz, vr, vzeta, vpa, vperp, z, r, collisions, dt)
+    end
+    if advance.neutral_ionization_collisions_1V
+        neutral_ionization_collisions_1V!(fvec_out.pdf_neutral, fvec_in, vz, vpa, vperp,
+                                          z, r, vz_spectral, moments, composition,
+                                          collisions, dt)
+    elseif advance.neutral_ionization_collisions
+        neutral_ionization_collisions_3V!(fvec_out.pdf_neutral, fvec_in, composition, vz,
+                                          vr, vzeta, vpa, vperp, z, r, collisions, dt)
     end
     if advance.ionization_source
         constant_ionization_source!(fvec_out.pdf, fvec_in, vpa, vperp, z, r, moments,
@@ -2173,7 +2584,7 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
     end
 
     # add numerical dissipation
-    if advance.numerical_dissipation
+    if advance.ion_numerical_dissipation
         vpa_dissipation!(fvec_out.pdf, fvec_in.pdf, vpa, vpa_spectral, dt,
                          num_diss_params.ion.vpa_dissipation_coefficient)
         vperp_dissipation!(fvec_out.pdf, fvec_in.pdf, vperp, vperp_spectral, dt,
@@ -2182,6 +2593,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
                        num_diss_params.ion.z_dissipation_coefficient, scratch_dummy)
         r_dissipation!(fvec_out.pdf, fvec_in.pdf, r, r_spectral, dt,
                        num_diss_params.ion.r_dissipation_coefficient, scratch_dummy)
+    end
+    if advance.neutral_numerical_dissipation
         vz_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, vz,
                                 vz_spectral, dt, num_diss_params.neutral.vz_dissipation_coefficient)
         z_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, z, z_spectral,
@@ -2261,13 +2674,306 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
     return nothing
 end
 
+function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr,
+                         vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects,
+                         composition, collisions, geometry, scratch_dummy,
+                         manufactured_source_list, external_source_settings,
+                         num_diss_params, gyroavs, nl_solver_params, advance, fp_arrays,
+                         istage)
+
+    vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral
+    vz_spectral, vr_spectral, vzeta_spectral = spectral_objects.vz_spectral, spectral_objects.vr_spectral, spectral_objects.vzeta_spectral
+    vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect
+    neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect
+
+    if nl_solver_params.ion_advance !== nothing
+        success = implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments,
+                                        advect_objects, vz, vr, vzeta, vpa, vperp,
+                                        gyrophase, z, r, t, dt, spectral_objects,
+                                        composition, collisions, geometry, scratch_dummy,
+                                        manufactured_source_list,
+                                        external_source_settings, num_diss_params,
+                                        gyroavs, nl_solver_params.ion_advance, advance,
+                                        fp_arrays, istage)
+        if !success
+            return success
+        end
+    elseif advance.vpa_advection
+        success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments,
+                                          z_advect, vpa_advect, vpa, vperp, z, r, dt, t,
+                                          r_spectral, z_spectral, vpa_spectral,
+                                          composition, collisions,
+                                          external_source_settings.ion, geometry,
+                                          nl_solver_params.vpa_advection,
+                                          advance.vpa_diffusion, num_diss_params, gyroavs,
+                                          scratch_dummy)
+        if !success
+            return success
+        end
+    end
+
+    return true
+end
+
+"""
+    implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects,
+                          vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt,
+                          spectral_objects, composition, collisions, geometry,
+                          scratch_dummy, manufactured_source_list,
+                          external_source_settings, num_diss_params,
+                          nl_solver_params, advance, fp_arrays, istage)
+
+Do a backward-Euler timestep for all terms in the ion kinetic equation.
+"""
+function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects,
+                               vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt,
+                               spectral_objects, composition, collisions, geometry,
+                               scratch_dummy, manufactured_source_list,
+                               external_source_settings, num_diss_params, gyroavs,
+                               nl_solver_params, advance, fp_arrays, istage)
+
+    vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral
+    vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect
+
+    # Make a copy of fvec_in.pdf so we can apply boundary conditions at the 'new'
+    # timestep, as these are the boundary conditions we need to apply the residual.
+    f_old = scratch_dummy.implicit_buffer_vpavperpzrs_1
+    begin_s_r_z_vperp_vpa_region()
+    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+        f_old[ivpa,ivperp,iz,ir,is] = fvec_in.pdf[ivpa,ivperp,iz,ir,is]
+    end
+
+    coords = (s=composition.n_ion_species, r=r, z=z, vperp=vperp, vpa=vpa)
+    icut_lower_z = scratch_dummy.int_buffer_rs_1
+    icut_upper_z = scratch_dummy.int_buffer_rs_2
+    zero = 1.0e-14
+
+    rtol = nl_solver_params.rtol
+    atol = nl_solver_params.atol
+
+    begin_s_r_z_region()
+    @loop_s_r_z is ir iz begin
+        @views hard_force_moment_constraints!(f_old[:,:,iz,ir,is], moments, vpa)
+    end
+
+    begin_s_r_region()
+    @loop_s_r is ir begin
+        if z.irank == 0
+            iz = 1
+            @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is],
+                                             fvec_in.upar[iz,ir,is],
+                                             moments.evolve_ppar,
+                                             moments.evolve_upar)
+            icut_lower_z[ir,is] = vpa.n
+            for ivpa ∈ vpa.n:-1:1
+                # for left boundary in zed (z = -Lz/2), want
+                # f(z=-Lz/2, v_parallel > 0) = 0
+                if vpa.scratch[ivpa] ≤ zero
+                    icut_lower_z[ir,is] = ivpa + 1
+                    break
+                end
+            end
+        end
+        if z.irank == z.nrank - 1
+            iz = z.n
+            @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is],
+                                             fvec_in.upar[iz,ir,is],
+                                             moments.evolve_ppar,
+                                             moments.evolve_upar)
+            icut_upper_z[ir,is] = 0
+            for ivpa ∈ 1:vpa.n
+                # for right boundary in zed (z = Lz/2), want
+                # f(z=Lz/2, v_parallel < 0) = 0
+                if vpa.scratch[ivpa] ≥ -zero
+                    icut_upper_z[ir,is] = ivpa - 1
+                    break
+                end
+            end
+        end
+    end
+
+    if vpa.n > 1
+        # calculate the vpa advection speed, to ensure it is correct when used to apply the
+        # boundary condition
+        update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition,
+                          collisions, external_source_settings.ion, t, geometry)
+    end
+    if z.n > 1
+        @loop_s is begin
+            # get the updated speed along the z direction using the current f
+            @views update_speed_z!(z_advect[is], fvec_in.upar[:,:,is],
+                                   moments.ion.vth[:,:,is], moments.evolve_upar,
+                                   moments.evolve_ppar, fields, vpa, vperp, z, r, t,
+                                   geometry, is)
+        end
+    end
+    if r.n > 1
+        @loop_s is begin
+            # get the updated speed along the r direction using the current f
+            @views update_speed_r!(r_advect[is], fvec_in.upar[:,:,is],
+                                   moments.ion.vth[:,:,is], fields, moments.evolve_upar,
+                                   moments.evolve_ppar, vpa, vperp, z, r, geometry, is)
+        end
+    end
+    if vperp.n > 1
+        # calculate the vpa advection speed, to ensure it is correct when used to apply the
+        # boundary condition
+        begin_s_r_z_vpa_region()
+        @loop_s is begin
+            # get the updated speed along the r direction using the current f
+            @views update_speed_vperp!(vperp_advect[is], vpa, vperp, z, r, z_advect[is],
+                                       r_advect[is], geometry)
+        end
+    end
+
+    function apply_bc!(x)
+        if vpa.n > 1
+            begin_s_r_z_vperp_region()
+            @loop_s_r_z_vperp is ir iz ivperp begin
+                @views enforce_v_boundary_condition_local!(x[:,ivperp,iz,ir,is], vpa.bc,
+                                                           vpa_advect[is].speed[:,ivperp,iz,ir],
+                                                           advance.vpa_diffusion, vpa,
+                                                           vpa_spectral)
+            end
+        end
+        if vperp.n > 1
+            begin_s_r_z_vpa_region()
+            enforce_vperp_boundary_condition!(x, vperp.bc, vperp, vperp_spectral,
+                                              vperp_adv, vperp_diffusion)
+        end
+
+        if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1)
+            # Wall boundary conditions. Note that as density, upar, ppar do not
+            # change in this implicit step, f_new, f_old, and residual should all
+            # be zero at exactly the same set of grid points, so it is reasonable
+            # to zero-out `residual` to impose the boundary condition. We impose
+            # this after subtracting f_old in case rounding errors, etc. mean that
+            # at some point f_old had a different boundary condition cut-off
+            # index.
+            begin_s_r_vperp_region()
+            if z.irank == 0
+                iz = 1
+                @loop_s_r_vperp is ir ivperp begin
+                    x[icut_lower_z[ir,is]:end,ivperp,iz,ir,is] .= 0.0
+                end
+            end
+            if z.irank == z.nrank - 1
+                iz = z.n
+                @loop_s_r_vperp is ir ivperp begin
+                    x[1:icut_upper_z[ir,is],ivperp,iz,ir,is] .= 0.0
+                end
+            end
+        end
+
+        return nothing
+    end
+
+    # Use a forward-Euler step as the initial guess for fvec_out.pdf
+    euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr,
+                        vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects,
+                        composition, collisions, geometry, scratch_dummy,
+                        manufactured_source_list, external_source_settings,
+                        num_diss_params, advance, fp_arrays, istage)
+
+    # Apply the 'new' boundary conditions to f_old, so it has the same boundary conditions
+    # as we will apply to the residual, so that f_new obeys the 'new' boundary conditions.
+    apply_bc!(f_old)
+    # Also apply the bc to the forward-Euler updated values which are the initial state
+    # for 'f_new'.
+    apply_bc!(fvec_out.pdf)
+    hard_force_moment_constraints!(fvec_out.pdf, moments, vpa)
+
+    # Define a function whose input is `f_new`, so that when it's output
+    # `residual` is zero, f_new is the result of a backward-Euler timestep:
+    #   (f_new - f_old) / dt = RHS(f_new)
+    # ⇒ f_new - f_old - dt*RHS(f_new) = 0
+    function residual_func!(residual, f_new)
+        begin_s_r_z_vperp_vpa_region()
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            residual[ivpa,ivperp,iz,ir,is] = f_old[ivpa,ivperp,iz,ir,is]
+        end
+
+        # scratch_pdf struct containing the array passed as f_new
+        new_scratch = scratch_pdf(f_new, fvec_out.density, fvec_out.upar, fvec_out.ppar,
+                                  fvec_out.pperp, fvec_out.temp_z_s, fvec_out.pdf_neutral,
+                                  fvec_out.density_neutral, fvec_out.uz_neutral,
+                                  fvec_out.pz_neutral)
+        # scratch_pdf struct containing the array passed as residual
+        residual_scratch = scratch_pdf(residual, fvec_out.density, fvec_out.upar,
+                                       fvec_out.ppar, fvec_out.pperp, fvec_out.temp_z_s,
+                                       fvec_out.pdf_neutral, fvec_out.density_neutral,
+                                       fvec_out.uz_neutral, fvec_out.pz_neutral)
+
+        # Ensure moments are consistent with f_new
+        update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition,
+                                r_spectral, geometry, gyroavs, scratch_dummy, z_advect,
+                                false)
+        calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z,
+                                          z_spectral,
+                                          num_diss_params.ion.moment_dissipation_coefficient)
+
+        euler_time_advance!(residual_scratch, new_scratch, pdf, fields, moments,
+                            advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z,
+                            r, t, dt, spectral_objects, composition, collisions, geometry,
+                            scratch_dummy, manufactured_source_list,
+                            external_source_settings, num_diss_params, advance, fp_arrays,
+                            istage)
+
+        # Make sure updated f will not contain negative values
+        #@. residual = max(residual, minval)
+
+        # Now
+        #   residual = f_old + dt*RHS(f_new)
+        # so update to desired residual
+        begin_s_r_z_vperp_vpa_region()
+        @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
+            residual[ivpa,ivperp,iz,ir,is] = f_new[ivpa,ivperp,iz,ir,is] - residual[ivpa,ivperp,iz,ir,is]
+        end
+
+        apply_bc!(residual)
+
+        begin_s_r_z_region()
+        @loop_s_r_z is ir iz begin
+            @views moment_constraints_on_residual!(residual[:,:,iz,ir,is],
+                                                   f_new[:,:,iz,ir,is], moments, vpa)
+        end
+
+        return nothing
+    end
+
+    # No preconditioning for now
+    left_preconditioner = identity
+    right_preconditioner = identity
+
+    # Buffers
+    # Note vpa,scratch is used by advance_f!, so we cannot use it here.
+    residual = scratch_dummy.implicit_buffer_vpavperpzrs_2
+    delta_x = scratch_dummy.implicit_buffer_vpavperpzrs_3
+    rhs_delta = scratch_dummy.implicit_buffer_vpavperpzrs_4
+    v = scratch_dummy.implicit_buffer_vpavperpzrs_5
+    w = scratch_dummy.implicit_buffer_vpavperpzrs_6
+
+    # Using the forward-Euler step seems (in at least one case) to slightly
+    # increase the number of iterations, so skip this.
+    ## Use forward-Euler step for initial guess
+    #residual_func!(residual, this_f_out)
+    #this_f_out .+= residual
+
+    success = newton_solve!(fvec_out.pdf, residual_func!, residual, delta_x,
+                            rhs_delta, v, w, nl_solver_params, coords=coords,
+                            left_preconditioner=left_preconditioner,
+                            right_preconditioner=right_preconditioner)
+
+    nl_solver_params.stage_counter[] += 1
+
+    return success
+end
+
 """
 update the vector containing the pdf and any evolved moments of the pdf
 for use in the Runge-Kutta time advance
 """
-function update_solution_vector!(evolved, moments, istage, composition, vpa, vperp, z, r)
-    new_evolved = evolved[istage+1]
-    old_evolved = evolved[istage]
+function update_solution_vector!(new_evolved, old_evolved, moments, composition, vpa, vperp, z, r)
     begin_s_r_z_region()
     @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
         new_evolved.pdf[ivpa,ivperp,iz,ir,is] = old_evolved.pdf[ivpa,ivperp,iz,ir,is]
diff --git a/moment_kinetics/src/velocity_moments.jl b/moment_kinetics/src/velocity_moments.jl
index 1316080aa..5b211e437 100644
--- a/moment_kinetics/src/velocity_moments.jl
+++ b/moment_kinetics/src/velocity_moments.jl
@@ -1440,6 +1440,90 @@ function calculate_neutral_moment_derivatives!(moments, scratch, scratch_dummy,
     end
 end
 
+"""
+update velocity moments that are calculable from the evolved ion pdf
+"""
+function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition,
+    r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments)
+
+    if composition.gyrokinetic_ions
+        ff = scratch_dummy.buffer_vpavperpzrs_1
+        # fill buffer with ring-averaged F (gyroaverage at fixed position)
+        gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition)
+    else
+        ff = new_scratch.pdf
+    end
+
+    if !moments.evolve_density
+        update_density!(new_scratch.density, moments.ion.dens_updated,
+                        ff, vpa, vperp, z, r, composition)
+    end
+    if !moments.evolve_upar
+        update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density,
+                     new_scratch.ppar, ff, vpa, vperp, z, r, composition,
+                     moments.evolve_density, moments.evolve_ppar)
+    end
+    if !moments.evolve_ppar
+        # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e)
+        update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density,
+                     new_scratch.upar, ff, vpa, vperp, z, r, composition,
+                     moments.evolve_density, moments.evolve_upar)
+    end
+    update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition)
+
+    # if diagnostic time step/RK stage
+    # update the diagnostic chodura condition
+    if diagnostic_moments
+        update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect)
+    end
+    # update the thermal speed
+    begin_s_r_z_region()
+    try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible
+        update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition)
+    catch e
+        if global_size[] > 1
+            println("ERROR: error calculating vth in time_advance.jl")
+            println(e)
+            display(stacktrace(catch_backtrace()))
+            flush(stdout)
+            flush(stderr)
+            MPI.Abort(comm_world, 1)
+        end
+        rethrow(e)
+    end
+    # update the parallel heat flux
+    update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density,
+                 new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r,
+                 composition, moments.evolve_density, moments.evolve_upar,
+                 moments.evolve_ppar)
+    # add further moments to be computed here
+
+end
+
+"""
+update velocity moments that are calculable from the evolved neutral pdf
+"""
+function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r,
+                                         composition)
+
+    if !moments.evolve_density
+        update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated,
+                                new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition)
+    end
+    if !moments.evolve_upar
+        update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated,
+                           new_scratch.density_neutral, new_scratch.pz_neutral,
+                           new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition,
+                           moments.evolve_density, moments.evolve_ppar)
+    end
+    if !moments.evolve_ppar
+        update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated,
+                           new_scratch.density_neutral, new_scratch.uz_neutral,
+                           new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition,
+                           moments.evolve_density, moments.evolve_upar)
+    end
+end
+
 """
 computes the integral over vpa of the integrand, using the input vpa_wgts
 """
diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl
index 9d4881b0b..bd70b3503 100644
--- a/moment_kinetics/src/vpa_advection.jl
+++ b/moment_kinetics/src/vpa_advection.jl
@@ -6,8 +6,20 @@ export vpa_advection!
 export update_speed_vpa!
 
 using ..advection: advance_f_local!
+using ..boundary_conditions: enforce_v_boundary_condition_local!
 using ..communication
 using ..looping
+using ..moment_constraints: hard_force_moment_constraints!,
+                            moment_constraints_on_residual!
+using ..moment_kinetics_structs: scratch_pdf, weak_discretization_info
+using ..nonlinear_solvers: newton_solve!
+using ..velocity_moments: update_derived_moments!, calculate_ion_moment_derivatives!
+
+using ..array_allocation: allocate_float
+using ..boundary_conditions: vpagrid_to_dzdt
+using ..calculus: second_derivative!
+using LinearAlgebra
+using SparseArrays
 
 """
 """
@@ -30,6 +42,278 @@ function vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z,
     end
 end
 
+"""
+"""
+function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_advect,
+                                 vpa, vperp, z, r, dt, t, r_spectral, z_spectral,
+                                 vpa_spectral, composition, collisions,
+                                 ion_source_settings, geometry, nl_solver_params,
+                                 vpa_diffusion, num_diss_params, gyroavs, scratch_dummy)
+    if vperp.n > 1 && (moments.evolve_density || moments.evolve_upar || moments.evolve_ppar)
+        error("Moment constraints in implicit_vpa_advection!() do not support 2V runs yet")
+    end
+
+    # calculate the advection speed corresponding to current f
+    update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition,
+                      collisions, ion_source_settings, t, geometry)
+
+    # Ensure moments are consistent with f_new
+    new_scratch = scratch_pdf(f_out, fvec_in.density, fvec_in.upar, fvec_in.ppar,
+                              fvec_in.pperp, fvec_in.temp_z_s, fvec_in.pdf_neutral,
+                              fvec_in.density_neutral, fvec_in.uz_neutral,
+                              fvec_in.pz_neutral)
+    update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition,
+                            r_spectral, geometry, gyroavs, scratch_dummy, z_advect, false)
+    calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z,
+                                      z_spectral,
+                                      num_diss_params.ion.moment_dissipation_coefficient)
+
+    begin_s_r_z_vperp_region()
+
+    coords = (vpa=vpa,)
+    vpa_bc = vpa.bc
+    minval = num_diss_params.ion.force_minimum_pdf_value
+    vpa_dissipation_coefficient = num_diss_params.ion.vpa_dissipation_coefficient
+    zero = 1.0e-14
+    @loop_s is begin
+        @loop_r_z_vperp ir iz ivperp begin
+            f_old_no_bc = @view fvec_in.pdf[:,ivperp,iz,ir,is]
+            this_f_out = @view f_out[:,ivperp,iz,ir,is]
+            speed = @view vpa_advect[is].speed[:,ivperp,iz,ir]
+
+            if z.irank == 0 && iz == 1
+                @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is],
+                                                 fvec_in.upar[iz,ir,is],
+                                                 moments.evolve_ppar,
+                                                 moments.evolve_upar)
+                icut_lower_z = vpa.n
+                for ivpa ∈ vpa.n:-1:1
+                    # for left boundary in zed (z = -Lz/2), want
+                    # f(z=-Lz/2, v_parallel > 0) = 0
+                    if vpa.scratch[ivpa] ≤ zero
+                        icut_lower_z = ivpa + 1
+                        break
+                    end
+                end
+            end
+            if z.irank == z.nrank - 1 && iz == z.n
+                @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is],
+                                                 fvec_in.upar[iz,ir,is],
+                                                 moments.evolve_ppar,
+                                                 moments.evolve_upar)
+                icut_upper_z = 0
+                for ivpa ∈ 1:vpa.n
+                    # for right boundary in zed (z = Lz/2), want
+                    # f(z=Lz/2, v_parallel < 0) = 0
+                    if vpa.scratch[ivpa] ≥ -zero
+                        icut_upper_z = ivpa - 1
+                        break
+                    end
+                end
+            end
+
+            function apply_bc!(x)
+                # Boundary condition
+                enforce_v_boundary_condition_local!(x, vpa_bc, speed, vpa_diffusion,
+                                                    vpa, vpa_spectral)
+
+                if z.bc == "wall"
+                    # Wall boundary conditions. Note that as density, upar, ppar do not
+                    # change in this implicit step, f_new, f_old, and residual should all
+                    # be zero at exactly the same set of grid points, so it is reasonable
+                    # to zero-out `residual` to impose the boundary condition. We impose
+                    # this after subtracting f_old in case rounding errors, etc. mean that
+                    # at some point f_old had a different boundary condition cut-off
+                    # index.
+                    if z.irank == 0 && iz == 1
+                        x[icut_lower_z:end] .= 0.0
+                    end
+                    # absolute velocity at right boundary
+                    if z.irank == z.nrank - 1 && iz == z.n
+                        x[1:icut_upper_z] .= 0.0
+                    end
+                end
+            end
+
+            # Need to apply 'new' boundary conditions to `f_old`, so that by imposing them
+            # on `residual`, they are automatically imposed on `f_new`.
+            f_old = vpa.scratch7 .= f_old_no_bc
+            apply_bc!(f_old)
+
+            #if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0
+            #    advection_matrix = allocate_float(vpa.n, vpa.n)
+            #    advection_matrix .= 0.0
+            #    for i ∈ 1:vpa.nelement_local
+            #        imin = vpa.imin[i] - (i != 1)
+            #        imax = vpa.imax[i]
+            #        if i == 1
+            #            advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i]
+            #        else
+            #            if speed[imin] < 0.0
+            #                advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i]
+            #            elseif speed[imin] > 0.0
+            #                # Do nothing
+            #            else
+            #                advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i]
+            #            end
+            #        end
+            #        advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i]
+            #        if i == vpa.nelement_local
+            #            advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i]
+            #        else
+            #            if speed[imax] < 0.0
+            #                # Do nothing
+            #            elseif speed[imax] > 0.0
+            #                advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i]
+            #            else
+            #                advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i]
+            #            end
+            #        end
+            #    end
+            #    # Multiply by advection speed
+            #    for i ∈ 1:vpa.n
+            #        advection_matrix[i,:] .*= dt * speed[i]
+            #    end
+            #    for i ∈ 1:vpa.n
+            #        advection_matrix[i,i] += 1.0
+            #    end
+
+            #    if isa(vpa_spectral, weak_discretization_info)
+            #        # This allocates a new matrix - to avoid this would need to pre-allocate a
+            #        # suitable buffer somewhere and use `mul!()`.
+            #        advection_matrix = vpa_spectral.mass_matrix * advection_matrix
+            #        @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix
+            #    elseif vpa_dissipation_coefficient > 0.0
+            #        error("Non-weak-form schemes cannot precondition diffusion")
+            #    end
+
+            #    # hacky (?) Dirichlet boundary conditions
+            #    this_f_out[1] = 0.0
+            #    this_f_out[end] = 0.0
+            #    advection_matrix[1,:] .= 0.0
+            #    advection_matrix[1,1] = 1.0
+            #    advection_matrix[end,:] .= 0.0
+            #    advection_matrix[end,end] = 1.0
+
+            #    if z.bc == "wall"
+            #        if z.irank == 0 && iz == 1
+            #            # Set equal df/dt equal to f on points that should be set to zero for
+            #            # boundary condition. The vector that the inverse of the advection matrix
+            #            # acts on should have zeros there already.
+            #            advection_matrix[icut_lower_z:end,icut_lower_z:end] .= 0.0
+            #            for i ∈ icut_lower_z:vpa.n
+            #                advection_matrix[i,i] = 1.0
+            #            end
+            #        end
+            #        if z.irank == z.nrank - 1 && iz == z.n
+            #            # Set equal df/dt equal to f on points that should be set to zero for
+            #            # boundary condition. The vector that the inverse of the advection matrix
+            #            # acts on should have zeros there already.
+            #            # I comes from LinearAlgebra and represents identity matrix
+            #            advection_matrix[1:icut_upper_z,1:icut_upper_z] .= 0.0
+            #            for i ∈ 1:icut_upper_z
+            #                advection_matrix[i,i] = 1.0
+            #            end
+            #        end
+            #    end
+
+            #    advection_matrix = sparse(advection_matrix)
+            #    nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix)
+            #end
+
+            #function preconditioner(x)
+            #    if isa(vpa_spectral, weak_discretization_info)
+            #        # Multiply by mass matrix, storing result in vpa.scratch
+            #        mul!(vpa.scratch, vpa_spectral.mass_matrix, x)
+            #    end
+
+            #    # Handle boundary conditions
+            #    enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion,
+            #                                        vpa, vpa_spectral)
+
+            #    if z.bc == "wall"
+            #        # Wall boundary conditions. Note that as density, upar, ppar do not
+            #        # change in this implicit step, f_new, f_old, and residual should all
+            #        # be zero at exactly the same set of grid points, so it is reasonable
+            #        # to zero-out `residual` to impose the boundary condition. We impose
+            #        # this after subtracting f_old in case rounding errors, etc. mean that
+            #        # at some point f_old had a different boundary condition cut-off
+            #        # index.
+            #        if z.irank == 0 && iz == 1
+            #            vpa.scratch[icut_lower_z:end] .= 0.0
+#           #             println("at icut_lower_z ", f_new[icut_lower_z], " ", f_old[icut_lower_z])
+            #        end
+            #        # absolute velocity at right boundary
+            #        if z.irank == z.nrank - 1 && iz == z.n
+            #            vpa.scratch[1:icut_upper_z] .= 0.0
+            #        end
+            #    end
+
+            #    # Do LU application on vpa.scratch, storing result in x
+            #    ldiv!(x, nl_solver_params.preconditioners[ivperp,iz,ir,is], vpa.scratch)
+            #    return nothing
+            #end
+            left_preconditioner = identity
+            right_preconditioner = identity
+            #right_preconditioner = preconditioner
+
+            # Define a function whose input is `f_new`, so that when it's output
+            # `residual` is zero, f_new is the result of a backward-Euler timestep:
+            #   (f_new - f_old) / dt = RHS(f_new)
+            # ⇒ f_new - f_old - dt*RHS(f_new) = 0
+            function residual_func!(residual, f_new)
+                apply_bc!(f_new)
+                residual .= f_old
+                advance_f_local!(residual, f_new, vpa_advect[is], ivperp, iz, ir, vpa, dt,
+                                 vpa_spectral)
+
+                if vpa_diffusion
+                    second_derivative!(vpa.scratch, f_new, vpa, vpa_spectral)
+                    @. residual += dt * vpa_dissipation_coefficient * vpa.scratch
+                end
+
+                # Make sure updated f will not contain negative values
+                #@. residual = max(residual, minval)
+
+                # Now
+                #   residual = f_old + dt*RHS(f_new)
+                # so update to desired residual
+                @. residual = f_new - residual
+
+                apply_bc!(residual)
+            end
+
+            # Buffers
+            # Note vpa,scratch is used by advance_f!, so we cannot use it here.
+            residual = vpa.scratch2
+            delta_x = vpa.scratch3
+            rhs_delta = vpa.scratch4
+            v = vpa.scratch5
+            w = vpa.scratch6
+
+            # Use forward-Euler step for initial guess
+            # By passing this_f_out, which is equal to f_old at this point, the 'residual'
+            # is
+            #   f_new - f_old - dt*RHS(f_old) = -dt*RHS(f_old)
+            # so to get a forward-Euler step we have to subtract this 'residual'
+            residual_func!(residual, this_f_out)
+            this_f_out .-= residual
+
+            success = newton_solve!(this_f_out, residual_func!, residual, delta_x,
+                                    rhs_delta, v, w, nl_solver_params, coords=coords,
+                                    left_preconditioner=left_preconditioner,
+                                    right_preconditioner=right_preconditioner)
+            if !success
+                return success
+            end
+        end
+    end
+
+    nl_solver_params.stage_counter[] += 1
+
+    return true
+end
+
 """
 calculate the advection speed in the vpa-direction at each grid point
 """
diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl
new file mode 100644
index 000000000..5c0b08dfe
--- /dev/null
+++ b/moment_kinetics/test/nonlinear_solver_tests.jl
@@ -0,0 +1,289 @@
+module NonlinearSolverTests
+
+include("setup.jl")
+
+using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
+using moment_kinetics.communication
+using moment_kinetics.coordinates: coordinate
+using moment_kinetics.input_structs: advection_input
+using moment_kinetics.looping
+using moment_kinetics.looping: setup_loop_ranges!
+using moment_kinetics.nonlinear_solvers
+using moment_kinetics.type_definitions: mk_float, mk_int
+
+using MPI
+
+function linear_test()
+    println("    - linear test")
+    @testset "linear test $coord_names" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true))
+        # Test represents constant-coefficient diffusion, in 1D steady state, with a
+        # central finite-difference discretisation of the second derivative.
+        #
+        # Note, need to use newton_solve!() here even though it is a linear problem,
+        # because the inexact Jacobian-vector product we use in linear_solve!() means
+        # linear_solve!() on its own does not converge to the correct answer.
+
+        n = 16
+        restart = 8
+        max_restarts = 1
+        atol = 1.0e-10
+
+        irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r =
+            setup_distributed_memory_MPI(1, 1, 1, 1)
+
+        setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1,
+                           vzeta=1, vr=1, vz=1)
+
+        A = zeros(n,n)
+        i = 1
+        A[i,i] = -2.0
+        A[i,i+1] = 1.0
+        for i ∈ 2:n-1
+            A[i,i-1] = 1.0
+            A[i,i] = -2.0
+            A[i,i+1] = 1.0
+        end
+        i = n
+        A[i,i-1] = 1.0
+        A[i,i] = -2.0
+
+        z = collect(0:n-1) ./ (n-1)
+        b = @. - z * (1.0 - z)
+
+        the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0),
+                               zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0),
+                               "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0, 0), zeros(mk_float, 0, 0),
+                               advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n,
+                               zeros(mk_float, 0), zeros(mk_float, 0), "",
+                               zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0),
+                               zeros(mk_float, 0, 0))
+        coords = NamedTuple(c => the_coord for c ∈ coord_names)
+
+        function rhs_func!(residual, x)
+            if serial_solve
+                residual .= A * x - b
+            else
+                begin_serial_region()
+                @serial_region begin
+                    residual .= A * x - b
+                end
+            end
+            return nothing
+        end
+
+        if serial_solve
+            x = allocate_float(n)
+            residual = allocate_float(n)
+            delta_x = allocate_float(n)
+            rhs_delta = allocate_float(n)
+            v = allocate_float(n)
+            w = allocate_float(n)
+
+            x .= 0.0
+            residual .= 0.0
+            delta_x .= 0.0
+            rhs_delta .= 0.0
+            v .= 0.0
+            w .= 0.0
+        else
+            x = allocate_shared_float(n)
+            residual = allocate_shared_float(n)
+            delta_x = allocate_shared_float(n)
+            rhs_delta = allocate_shared_float(n)
+            v = allocate_shared_float(n)
+            w = allocate_shared_float(n)
+
+            begin_serial_region()
+            @serial_region begin
+                x .= 0.0
+                residual .= 0.0
+                delta_x .= 0.0
+                rhs_delta .= 0.0
+                v .= 0.0
+                w .= 0.0
+            end
+        end
+
+        nl_solver_params = setup_nonlinear_solve(
+            Dict{String,Any}("nonlinear_solver" =>
+                             Dict{String,Any}("rtol" => 0.0,
+                                              "atol" => atol,
+                                              "linear_restart" => restart,
+                                              "linear_max_restarts" => max_restarts)),
+            coords; serial_solve=serial_solve)
+
+        newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params;
+                      coords)
+
+        if serial_solve
+            x_direct = A \ b
+
+            @test isapprox(x, x_direct; atol=100.0*atol)
+        else
+            begin_serial_region()
+            @serial_region begin
+                x_direct = A \ b
+
+                @test isapprox(x, x_direct; atol=100.0*atol)
+            end
+        end
+    end
+end
+
+function nonlinear_test()
+    println("    - non-linear test")
+    @testset "non-linear test" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true))
+        # Test represents constant-coefficient diffusion, in 1D steady state, with a
+        # central finite-difference discretisation of the second derivative.
+        #
+        # Note, need to use newton_solve!() here even though it is a linear problem,
+        # because the inexact Jacobian-vector product we use in linear_solve!() means
+        # linear_solve!() on its own does not converge to the correct answer.
+
+        n = 16
+        restart = 10
+        max_restarts = 0
+        atol = 1.0e-10
+
+        irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r =
+            setup_distributed_memory_MPI(1, 1, 1, 1)
+
+        setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1,
+                           vzeta=1, vr=1, vz=1)
+
+        z = collect(0:n-1) ./ (n-1)
+        b = @. - z * (1.0 - z)
+
+        the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0),
+                               zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0),
+                               "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0, 0), zeros(mk_float, 0, 0),
+                               advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n,
+                               zeros(mk_float, 0), zeros(mk_float, 0), "",
+                               zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0),
+                               zeros(mk_float, 0, 0))
+        coords = NamedTuple(c => the_coord for c ∈ coord_names)
+
+        function rhs_func!(residual, x)
+            if serial_solve
+                i = 1
+                D = abs(x[i])^2.5
+                residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i]
+                for i ∈ 2:n-1
+                    D = abs(x[i])^2.5
+                    residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i]
+                end
+                i = n
+                D = abs(x[i])^2.5
+                residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i]
+            else
+                begin_serial_region()
+                @serial_region begin
+                    i = 1
+                    D = abs(x[i])^2.5
+                    residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i]
+                    for i ∈ 2:n-1
+                        D = abs(x[i])^2.5
+                        residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i]
+                    end
+                    i = n
+                    D = abs(x[i])^2.5
+                    residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i]
+                end
+            end
+            return nothing
+        end
+
+        if serial_solve
+            x = allocate_float(n)
+            residual = allocate_float(n)
+            delta_x = allocate_float(n)
+            rhs_delta = allocate_float(n)
+            v = allocate_float(n)
+            w = allocate_float(n)
+        else
+            x = allocate_shared_float(n)
+            residual = allocate_shared_float(n)
+            delta_x = allocate_shared_float(n)
+            rhs_delta = allocate_shared_float(n)
+            v = allocate_shared_float(n)
+            w = allocate_shared_float(n)
+        end
+
+        if serial_solve
+            x .= 1.0
+            residual .= 0.0
+            delta_x .= 0.0
+            rhs_delta .= 0.0
+            v .= 0.0
+            w .= 0.0
+        else
+            begin_serial_region()
+            @serial_region begin
+                x .= 1.0
+                residual .= 0.0
+                delta_x .= 0.0
+                rhs_delta .= 0.0
+                v .= 0.0
+                w .= 0.0
+            end
+        end
+
+        nl_solver_params = setup_nonlinear_solve(
+            Dict{String,Any}("nonlinear_solver" =>
+                             Dict{String,Any}("rtol" => 0.0,
+                                              "atol" => atol,
+                                              "linear_restart" => restart,
+                                              "linear_max_restarts" => max_restarts,
+                                              "nonlinear_max_iterations" => 100)),
+            coords; serial_solve=serial_solve)
+
+        newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params;
+                      coords)
+
+        rhs_func!(residual, x)
+
+        if serial_solve
+            @test isapprox(residual, zeros(n); atol=4.0*atol)
+        else
+            begin_serial_region()
+            @serial_region begin
+                @test isapprox(residual, zeros(n); atol=4.0*atol)
+            end
+        end
+    end
+end
+
+function runtests()
+    if Sys.isapple()
+        @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin
+            println("non-linear solver tests")
+            linear_test()
+            nonlinear_test()
+        end
+    else
+        @testset "non-linear solvers" begin
+            println("non-linear solver tests")
+            linear_test()
+            nonlinear_test()
+        end
+    end
+end
+
+end # NonlinearSolverTests
+
+using .NonlinearSolverTests
+NonlinearSolverTests.runtests()
diff --git a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl
index cdee05ac9..d76a644f9 100644
--- a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl
+++ b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl
@@ -163,6 +163,11 @@ test_input_chebyshev = merge(test_input_finite_difference,
                                   "vz_ngrid" => 17,
                                   "vz_nelement" => 8))
 
+if global_size[] > 2 && global_size[] % 2 == 0
+    # Test using distributed-memory
+    test_input_chebyshev["z_nelement_local"] = test_input_chebyshev["z_nelement"] ÷ 2
+end
+
 test_input_chebyshev_split_1_moment =
     merge(test_input_chebyshev,
           Dict("run_name" => "chebyshev_pseudospectral_split_1_moment",
diff --git a/moment_kinetics/test/nonlinear_sound_wave_tests.jl b/moment_kinetics/test/nonlinear_sound_wave_tests.jl
index 0bb965494..590047000 100644
--- a/moment_kinetics/test/nonlinear_sound_wave_tests.jl
+++ b/moment_kinetics/test/nonlinear_sound_wave_tests.jl
@@ -6,12 +6,9 @@ using Base.Filesystem: tempname
 
 using moment_kinetics.coordinates: define_coordinate
 using moment_kinetics.input_structs: grid_input, advection_input
-using moment_kinetics.load_data: open_readonly_output_file, load_coordinate_data,
-                                 load_species_data, load_fields_data,
-                                 load_ion_moments_data, load_pdf_data,
-                                 load_neutral_particle_moments_data,
-                                 load_neutral_pdf_data, load_time_data, load_species_data
 using moment_kinetics.interpolation: interpolate_to_grid_z, interpolate_to_grid_vpa
+using moment_kinetics.load_data: get_run_info_no_setup, close_run_info,
+                                 postproc_load_variable
 using moment_kinetics.type_definitions: mk_float
 
 const analytical_rtol = 3.e-2
@@ -79,35 +76,43 @@ function run_test(test_input, rtol, atol, upar_rtol=nothing; args...)
             # Load and analyse output
             #########################
 
-            path = joinpath(realpath(input["base_directory"]), name, name)
+            path = joinpath(realpath(input["base_directory"]), name)
 
-            # open the netcdf file containing moments data and give it the handle 'fid'
-            fid = open_readonly_output_file(path, "moments")
+            # open the output file(s)
+            run_info = get_run_info_no_setup(path; dfns=true)
 
             # load species, time coordinate data
-            n_ion_species, n_neutral_species = load_species_data(fid)
-            ntime, time = load_time_data(fid)
-            n_ion_species, n_neutral_species = load_species_data(fid)
+            n_ion_species = run_info.composition.n_ion_species
+            n_neutral_species = run_info.composition.n_neutral_species
+            ntime = run_info.nt
+            time = run_info.time
             
             # load fields data
-            phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid)
+            phi_zrt = postproc_load_variable(run_info, "phi")
+            Er_zrt = postproc_load_variable(run_info, "Er")
+            Ez_zrt = postproc_load_variable(run_info, "Ez")
 
             # load velocity moments data
-            n_ion_zrst, upar_ion_zrst, ppar_ion_zrst, qpar_ion_zrst, v_t_ion_zrst = load_ion_moments_data(fid)
-            n_neutral_zrst, upar_neutral_zrst, ppar_neutral_zrst, qpar_neutral_zrst, v_t_neutral_zrst = load_neutral_particle_moments_data(fid)
-            z, z_spectral = load_coordinate_data(fid, "z")
+            n_ion_zrst = postproc_load_variable(run_info, "density")
+            upar_ion_zrst = postproc_load_variable(run_info, "parallel_flow")
+            ppar_ion_zrst = postproc_load_variable(run_info, "parallel_pressure")
+            qpar_ion_zrst = postproc_load_variable(run_info, "parallel_heat_flux")
+            v_t_ion_zrst = postproc_load_variable(run_info, "thermal_speed")
+            n_neutral_zrst = postproc_load_variable(run_info, "density_neutral")
+            upar_neutral_zrst = postproc_load_variable(run_info, "uz_neutral")
+            ppar_neutral_zrst = postproc_load_variable(run_info, "pz_neutral")
+            qpar_neutral_zrst = postproc_load_variable(run_info, "qz_neutral")
+            v_t_neutral_zrst = postproc_load_variable(run_info, "thermal_speed_neutral")
+            z = run_info.z
+            z_spectral = run_info.z_spectral
 
-            close(fid)
-            
-            # open the netcdf file containing pdf data
-            fid = open_readonly_output_file(path, "dfns")
-            
             # load particle distribution function (pdf) data
-            f_ion_vpavperpzrst = load_pdf_data(fid)
-            f_neutral_vzvrvzetazrst = load_neutral_pdf_data(fid)
-            vpa, vpa_spectral = load_coordinate_data(fid, "vpa")
+            f_ion_vpavperpzrst = postproc_load_variable(run_info, "f")
+            f_neutral_vzvrvzetazrst = postproc_load_variable(run_info, "f_neutral")
+            vpa = run_info.vpa
+            vpa_spectral = run_info.vpa_spectral
 
-            close(fid)
+            close_run_info(run_info)
             
             phi = phi_zrt[:,1,:]
             n_ion = n_ion_zrst[:,1,:,:]
diff --git a/moment_kinetics/test/recycling_fraction_tests.jl b/moment_kinetics/test/recycling_fraction_tests.jl
index 935ed968e..28a22c84f 100644
--- a/moment_kinetics/test/recycling_fraction_tests.jl
+++ b/moment_kinetics/test/recycling_fraction_tests.jl
@@ -12,10 +12,8 @@ using MPI
 using moment_kinetics.coordinates: define_coordinate
 using moment_kinetics.input_structs: grid_input, advection_input
 using moment_kinetics.interpolation: interpolate_to_grid_z
-using moment_kinetics.load_data: open_readonly_output_file
-using moment_kinetics.load_data: load_fields_data,
-                                 load_pdf_data, load_time_data,
-                                 load_species_data
+using moment_kinetics.load_data: get_run_info_no_setup, close_run_info,
+                                 postproc_load_variable
 
 # default inputs for tests
 test_input = Dict("n_ion_species" => 1,
@@ -91,6 +89,10 @@ test_input = Dict("n_ion_species" => 1,
                                        "source_strength" => 2.0,
                                        "source_T" => 2.0))
 
+if global_size[] > 2 && global_size[] % 2 == 0
+    # Test using distributed-memory
+    test_input["z_nelement_local"] = test_input["z_nelement"] ÷ 2
+end
 
 test_input_split1 = merge(test_input,
                           Dict("run_name" => "split1",
@@ -203,20 +205,15 @@ function run_test(test_input, expected_phi; rtol=4.e-14, atol=1.e-15, args...)
             # Load and analyse output
             #########################
 
-            path = joinpath(realpath(input["base_directory"]), name, name)
+            path = joinpath(realpath(input["base_directory"]), name)
 
-            # open the netcdf file and give it the handle 'fid'
-            fid = open_readonly_output_file(path,"moments")
+            # open the output file(s)
+            run_info = get_run_info_no_setup(path)
 
-            # load species, time coordinate data
-            n_ion_species, n_neutral_species = load_species_data(fid)
-            ntime, time = load_time_data(fid)
-            n_ion_species, n_neutral_species = load_species_data(fid)
-            
             # load fields data
-            phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid)
+            phi_zrt = postproc_load_variable(run_info, "phi")
 
-            close(fid)
+            close_run_info(run_info)
             
             phi = phi_zrt[:,1,:]
         end
diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl
index 1b78fca36..fa0f5d64f 100644
--- a/moment_kinetics/test/runtests.jl
+++ b/moment_kinetics/test/runtests.jl
@@ -7,6 +7,7 @@ function runtests()
         include(joinpath(@__DIR__, "calculus_tests.jl"))
         include(joinpath(@__DIR__, "interpolation_tests.jl"))
         include(joinpath(@__DIR__, "loop_setup_tests.jl"))
+        include(joinpath(@__DIR__, "nonlinear_solver_tests.jl"))
         include(joinpath(@__DIR__, "velocity_integral_tests.jl"))
         include(joinpath(@__DIR__, "sound_wave_tests.jl"))
         include(joinpath(@__DIR__, "nonlinear_sound_wave_tests.jl"))
diff --git a/moment_kinetics/test/setup.jl b/moment_kinetics/test/setup.jl
index 3bb9896ef..555824d00 100644
--- a/moment_kinetics/test/setup.jl
+++ b/moment_kinetics/test/setup.jl
@@ -13,9 +13,9 @@ using moment_kinetics
 module MKTestUtilities
 
 export use_verbose, force_optional_dependencies, @long, quietoutput, get_MPI_tempdir,
-       global_rank, maxabs_norm, @testset_skip
+       global_rank, global_size, maxabs_norm, @testset_skip
 
-using moment_kinetics.communication: comm_world, global_rank
+using moment_kinetics.communication: comm_world, global_rank, global_size
 using moment_kinetics.command_line_options: get_options
 
 using MPI
diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl
index 952444ea1..f4f3c366d 100644
--- a/util/calculate_rk_coeffs.jl
+++ b/util/calculate_rk_coeffs.jl
@@ -6,6 +6,115 @@ into ones that we can use.
 
 using Symbolics
 
+# Following two functions copied and modified from Symbolics.jl's linear_algebra.jl so
+# that we can hack them to force them to return a Rational{BigInt} result.
+# Modifications:
+#  * Add prefix `my_` to the function names, to avoid confusion/conflicts
+#  * Change `Num.()` to `Rational{BigInt}.()` in `_my_solve` so that `A` and `b` are
+#    arrays of `Rational{BigInt}` (so that we avoid any rounding errors). For the case
+#    that we want, the entries of `A` and `b` are all numerical values (not actual
+#    symbolic expressions), so this hack can be done.
+#  * Change `/` to `//` in `my_sym_lu2()`
+using Symbolics: linear_expansion, SymbolicUtils, value, sym_lu, Num, RCNum, _iszero, nterms
+using LinearAlgebra
+function my_solve_for(eq, var; simplify=false, check=true) # scalar case
+    # simplify defaults for `false` as canonicalization should handle most of
+    # the cases.
+    a, b, islinear = linear_expansion(eq, var)
+    check && @assert islinear
+    islinear || return nothing
+    # a * x + b = 0
+    if eq isa AbstractArray && var isa AbstractArray
+        x = _my_solve(a, -b, simplify)
+    else
+        x = a \ -b
+    end
+    simplify || return x
+    if x isa AbstractArray
+        SymbolicUtils.simplify.(simplify_fractions.(x))
+    else
+        SymbolicUtils.simplify(simplify_fractions(x))
+    end
+end
+
+function _my_solve(A::AbstractMatrix, b::AbstractArray, do_simplify)
+    #A = Num.(value.(SymbolicUtils.quick_cancel.(A)))
+    #b = Num.(value.(SymbolicUtils.quick_cancel.(b)))
+    A = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(A)))
+    b = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(b)))
+    sol = value.(sym_lu(A) \ b)
+    do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol
+end
+
+function my_solve_for2(eq, var; simplify=false, check=true) # scalar case
+    # simplify defaults for `false` as canonicalization should handle most of
+    # the cases.
+    a, b, islinear = linear_expansion(eq, var)
+    check && @assert islinear
+    islinear || return nothing
+    # a * x + b = 0
+    if eq isa AbstractArray && var isa AbstractArray
+        x = _my_solve2(a, -b, simplify)
+    else
+        x = a \ -b
+    end
+    simplify || return x
+    if x isa AbstractArray
+        SymbolicUtils.simplify.(simplify_fractions.(x))
+    else
+        SymbolicUtils.simplify(simplify_fractions(x))
+    end
+end
+
+function _my_solve2(A::AbstractMatrix, b::AbstractArray, do_simplify)
+    A = Num.(value.(SymbolicUtils.quick_cancel.(A)))
+    b = Num.(value.(SymbolicUtils.quick_cancel.(b)))
+    sol = value.(my_sym_lu2(A) \ b)
+    do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol
+end
+
+function my_sym_lu2(A; check=true)
+    SINGULAR = typemax(Int)
+    m, n = size(A)
+    F = map(x->x isa RCNum ? x : Num(x), A)
+    minmn = min(m, n)
+    p = Vector{LinearAlgebra.BlasInt}(undef, minmn)
+    info = 0
+    for k = 1:minmn
+        kp = k
+        amin = SINGULAR
+        for i in k:m
+            absi = _iszero(F[i, k]) ? SINGULAR : nterms(F[i,k])
+            if absi < amin
+                kp = i
+                amin = absi
+            end
+        end
+
+        p[k] = kp
+
+        if amin == SINGULAR && !(amin isa Symbolic) && (amin isa Number) && iszero(info)
+            info = k
+        end
+
+        # swap
+        for j in 1:n
+            F[k, j], F[kp, j] = F[kp, j], F[k, j]
+        end
+
+        for i in k+1:m
+            F[i, k] = F[i, k] // F[k, k]
+        end
+        for j = k+1:n
+            for i in k+1:m
+                F[i, j] = F[i, j] - F[i, k] * F[k, j]
+            end
+        end
+    end
+    check && LinearAlgebra.checknonsingular(info)
+    LU(F, p, convert(LinearAlgebra.BlasInt, info))
+end
+
 """
     convert_butcher_tableau_for_moment_kinetics(a, b)
 
@@ -27,11 +136,15 @@ that can be used to calculate an error estimate.
 Currently assumes the method is explicit, so `a` has no non-zero diagonal or
 upper-triangular elements.
 
-Returns an array `rk_coeffs` of size `n_rk_stages`x`n_rk_stages` where `size(a) =
+Returns an array `rk_coefs` of size `n_rk_stages`x`n_rk_stages` where `size(a) =
 (n_rk_stages, n_rk_stages)`.
 """
-function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true)
-    using_rationals = isa(a[1,1], Rational)
+function convert_butcher_tableau_for_moment_kinetics(a, b,
+                                                     a_implicit=zeros(size(a)),
+                                                     b_implicit=zeros(size(b));
+                                                     low_storage=true)
+    using_rationals = eltype(a) <: Rational || eltype(b) <: Rational || eltype(a_implicit) <: Rational || eltype(b_implicit) <: Rational
+    imex = any(a_implicit .!= 0)
     n_rk_stages = size(a, 1)
     if size(b, 1) > 1
         adaptive = true
@@ -50,291 +163,573 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true)
     # y_out are the same as y, but given as expressions in terms of y and f
     # k are the RHS evaluations as defined on the Wikipedia page
     # k_subs are the k evaluated in terms of y by back-substituting the definitions of y.
-    @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages]
-    y = Symbolics.scalarize(y)
-    y_out = Symbolics.scalarize(y_out)
+    @variables y_tilde[1:n_rk_stages+1] k[1:n_rk_stages] yn rk_coefs[1:n_rk_stages+1, 1:output_size]
+    @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages] rk_coefs_implicit[1:n_rk_stages, 1:output_size+1]
+    y_tilde = Symbolics.scalarize(y_tilde)
     k = Symbolics.scalarize(k)
-    k_subs = Symbolics.scalarize(k_subs)
+    rk_coefs = Symbolics.scalarize(rk_coefs)
+    y = Symbolics.scalarize(y)
+    k_implicit = Symbolics.scalarize(k_implicit)
+    rk_coefs_implicit = Symbolics.scalarize(rk_coefs_implicit)
+
+    # Expressions defined using the 'standard' Butcher formulae
+    y_tilde_k_expressions = [
+                             yn + (i == 1 ? 0 : sum(a[i,j] * k[j] for j ∈ 1:i-1) + sum(a_implicit[i,j] * k_implicit[j] for j ∈ 1:i-1))
+                             for i ∈ 1:n_rk_stages
+                            ]
+    # Note that when using an IMEX scheme, if a_implicit[i,i]==0, then k_implicit[i] is
+    # actually an explicit RHS evaluation (evaluated using y_tilde[i]), and the explicit
+    # RHS k[i] will be evaluated using y_tilde[i] instead of y[i] so that we can store
+    # (y_tilde[i] + k_implicit[i]) in y[i], as a way to have k_implicit[i] available.
+    implicit_coefficient_is_zero = [imex && a_implicit[i,i] == 0 for i ∈ 1:n_rk_stages]
+    y_k_expressions = [
+                       y_tilde_k_expressions[i] + (implicit_coefficient_is_zero[i] ? 1 : a_implicit[i,i]) * k_implicit[i]
+                       for i ∈ 1:n_rk_stages
+                      ]
+    # Final entry of y_k_expressions is y^(n+1)
+    push!(y_tilde_k_expressions, yn +
+                                 sum(b[1,i] * k[i] for i ∈ 1:n_rk_stages) +
+                                 sum(b_implicit[1,i] * k_implicit[i] for i ∈ 1:n_rk_stages))
 
-    if using_rationals
-        k_subs[1] = (y[2] - y[1]) // a[2,1]
+    if adaptive
+        y_loworder = yn +
+                     sum(b[2,i] * k[i] for i ∈ 1:n_rk_stages) +
+                     sum(b_implicit[2,i] * k_implicit[i] for i ∈ 1:n_rk_stages)
+    end
+
+    # Define expressions for y_tilde[i] using the rk_coefs as used in moment_kinetics
+    # Note that we need a special case for an imex scheme with some a[i,i]=0, as for those
+    # entries we hacked y[i] to allow k_implicit[i] to be saved, and we need to use
+    # y_tilde[i] as the starting point for the forward-Euler derivative instead of y[i].
+    y_tilde_rk_coefs_expressions = [
+                                    yn, # i=1
+                                    (sum(rk_coefs[j,i-1] * y_tilde[j] for j ∈ 1:i-1)
+                                     + rk_coefs[i,i-1] * ((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) +
+                                     sum(rk_coefs_implicit[j,i] * y[j] for j ∈ 1:i-1)
+                                     for i ∈ 2:n_rk_stages+1)...
+                                   ]
+    # Note the 'implicit step' is treated specially, as the coefficient will be used to
+    # scale the timestep in the code, rather than as the coefficient of some version of
+    # y/y_tilde. rk_coefs_implicit[i,i] should end up being equal to a_implicit[i,i].
+    y_rk_coefs_expressions = [
+                              e + rk_coefs_implicit[i,i] * k_implicit[i]
+                              for (i,e) ∈ enumerate(y_tilde_rk_coefs_expressions[1:n_rk_stages])
+                             ]
+
+    # Substitute to eliminate y_tilde[i] from the expressions
+    y_tilde_rk_coefs_expressions = [
+                                    substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1))
+                                    for e ∈ y_tilde_rk_coefs_expressions
+                                   ]
+    y_rk_coefs_expressions = [
+                              substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1))
+                              for e ∈ y_rk_coefs_expressions
+                             ]
+
+
+    # Substitute to eliminate y[i] from the expressions
+    y_tilde_rk_coefs_expressions = [
+                                    substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages))
+                                    for e ∈ y_tilde_rk_coefs_expressions
+                                   ]
+    y_rk_coefs_expressions = [
+                              substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages))
+                              for e ∈ y_rk_coefs_expressions
+                             ]
+
+    if adaptive
+        y_rk_coefs_err = sum(rk_coefs[j,n_rk_stages+1] * y_tilde[j] for j ∈ 1:n_rk_stages+1) +
+                         sum(rk_coefs_implicit[j,n_rk_stages+2] * y[j] for j ∈ 1:n_rk_stages)
+        y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1))
+        y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages))
+    end
+
+    # Construct equations that can be solved for rk_coefs entries by equating the
+    # coefficients of each k[i], k_implicit[i] in the two sets of expressions
+    rk_coefs_equations = []
+    for (i, (rk_coefs_expr, Butcher_expr)) ∈ enumerate(zip(y_rk_coefs_expressions, y_k_expressions))
+        for j ∈ 1:n_rk_stages
+            lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j])
+            rhs = Symbolics.coeff(Butcher_expr, k_implicit[j])
+            if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+                push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0)
+            else
+                push!(rk_coefs_equations, lhs ~ rhs + 0)
+            end
+        end
+        if i == 1
+            # EXplicit RK coefficients have no entries for i=1, because y_tilde[1]=yn
+            # always.
+            continue
+        end
+        lhs = Symbolics.coeff(rk_coefs_expr, yn)
+        rhs = Symbolics.coeff(Butcher_expr, yn)
+        if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+            push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0)
+        else
+            push!(rk_coefs_equations, lhs ~ rhs)
+        end
+        for j ∈ 1:n_rk_stages
+            lhs = Symbolics.coeff(rk_coefs_expr, k[j])
+            rhs = Symbolics.coeff(Butcher_expr, k[j])
+            if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+                push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0)
+            else
+                push!(rk_coefs_equations, lhs ~ rhs + 0)
+            end
+        end
+    end
+
+    # Include contribution from y_tilde[n_rk_stages+1]
+    i = n_rk_stages + 1
+    rk_coefs_expr = y_tilde_rk_coefs_expressions[n_rk_stages+1]
+    Butcher_expr = y_tilde_k_expressions[n_rk_stages+1]
+    for j ∈ 1:n_rk_stages
+        lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j])
+        rhs = Symbolics.coeff(Butcher_expr, k_implicit[j])
+        if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+            push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0)
+        else
+            push!(rk_coefs_equations, lhs ~ rhs + 0)
+        end
+    end
+    lhs = Symbolics.coeff(rk_coefs_expr, yn)
+    rhs = Symbolics.coeff(Butcher_expr, yn)
+    if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+        push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0)
     else
-        k_subs[1] = (y[2] - y[1]) / a[2,1]
+        push!(rk_coefs_equations, lhs ~ rhs)
     end
-    k_subs[1] = simplify(expand(k_subs[1]))
-    for i ∈ 2:n_rk_stages-1
-        if using_rationals
-            k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) // a[i+1,i]
+    for j ∈ 1:n_rk_stages
+        lhs = Symbolics.coeff(rk_coefs_expr, k[j])
+        rhs = Symbolics.coeff(Butcher_expr, k[j])
+        if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+            push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0)
         else
-            k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) / a[i+1,i]
+            push!(rk_coefs_equations, lhs ~ rhs + 0)
         end
-        k_subs[i] = simplify(expand(k_subs[i]))
     end
 
-    y_out[1] = y[1]
-    y_out[2] = y[1] + a[2,1] * k[1]
-    y_out[2] = simplify(expand(y_out[2]))
-    for i ∈ 3:n_rk_stages
-        y_out[i] = y[1] + sum(a[i,j]*k_subs[j] for j ∈ 1:i-2) + a[i,i-1]*k[i-1]
-        y_out[i] = simplify(expand(y_out[i]))
+    if adaptive
+        i = n_rk_stages + 1
+        lhs = Symbolics.coeff(y_rk_coefs_err, yn)
+        rhs = Symbolics.coeff(y_loworder, yn)
+        if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+            push!(rk_coefs_equations, rk_coefs[1,i] ~ 0)
+        else
+            push!(rk_coefs_equations, lhs ~ rhs)
+        end
+        for j ∈ 1:n_rk_stages
+            lhs = Symbolics.coeff(y_rk_coefs_err, k[j])
+            rhs = Symbolics.coeff(y_loworder, k[j])
+            if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+                push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0)
+            else
+                push!(rk_coefs_equations, lhs ~ rhs + 0)
+            end
+        end
+        i = n_rk_stages + 2
+        for j ∈ 1:n_rk_stages
+            lhs = Symbolics.coeff(y_rk_coefs_err, k_implicit[j])
+            rhs = Symbolics.coeff(y_loworder, k_implicit[j])
+            if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0
+                push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0)
+            else
+                push!(rk_coefs_equations, lhs ~ rhs + 0)
+            end
+        end
     end
 
-    y_out[n_rk_stages+1] = y[1] + sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1) +
-                           b[1,n_rk_stages]*k[n_rk_stages]
-    y_out[n_rk_stages+1] = simplify(expand(y_out[n_rk_stages+1]))
+    # Solve rk_coefs_equations for the rk_coefs entries
     if using_rationals
-        k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1]
-                               - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) //
-                              b[1,n_rk_stages]
+        rk_coefs_values = my_solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...])
     else
-        k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1]
-                               - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) /
-                              b[1,n_rk_stages]
-    end
-    k_subs[n_rk_stages] = simplify(expand(k_subs[n_rk_stages]))
-    #println("y_out")
-    #for i ∈ 1:n_rk_stages+1
-    #    println(y_out[i])
-    #end
-    #println("k")
-    #for i ∈ 1:n_rk_stages
-    #    println(k_subs[i])
-    #end
+        rk_coefs_values = Symbolics.solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...])
+    end
+    rk_coefs_implicit_values = reshape(rk_coefs_values[(n_rk_stages+1)*output_size+1:end], n_rk_stages, output_size+1)
+    rk_coefs_values = reshape(rk_coefs_values[1:(n_rk_stages+1)*output_size], n_rk_stages+1, output_size)
 
     if low_storage
         if using_rationals
-            rk_coeffs = zeros(Rational{Int64}, 3, output_size)
+            rk_coefs_out = zeros(Rational{Int64}, 3, output_size)
+            rk_coefs_implicit_out = zeros(Rational{Int64}, 3, output_size+1)
         else
-            rk_coeffs = zeros(3, output_size)
+            rk_coefs_out = zeros(3, output_size)
+            rk_coefs_implicit_out = zeros(3, output_size+1)
         end
         for i in 1:n_rk_stages
-            k_coeff = Symbolics.coeff(y_out[i+1], k[i])
-
             if i == 1
                 j = i
-                rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j])
-                #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1])
-                #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i])
-                # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step'
-                rk_coeffs[1,i] -= k_coeff
-
-                # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i])
-                rk_coeffs[3,i] = k_coeff
-                #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i])
+                rk_coefs_out[1,i] = rk_coefs_values[1,i]
+                rk_coefs_out[3,i] = rk_coefs_values[2,i]
+                for j ∈ 3:n_rk_stages+1
+                    if rk_coefs_values[j,i] != 0
+                        error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                    end
+                end
             else
                 j = 1
-                rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j])
-                for j ∈ 2:i-2
-                    if Symbolics.coeff(y_out[i+1], y[j]) != 0
+                rk_coefs_out[1,i] = rk_coefs_values[1,i]
+                for j ∈ 2:i-1
+                    if rk_coefs_values[j,i] != 0
                         error("Found non-zero coefficient where zero was expected for low-storage coefficients")
                     end
                 end
+                rk_coefs_out[2,i] = rk_coefs_values[i,i]
+                rk_coefs_out[3,i] = rk_coefs_values[i+1,i]
+                for j ∈ i+2:n_rk_stages+1
+                    if rk_coefs_values[j,i] != 0
+                        error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                    end
+                end
+            end
+        end
+        for i in 1:n_rk_stages
+            if i == 1
                 j = i
-                rk_coeffs[2,i] = Symbolics.coeff(y_out[i+1], y[j])
-                #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1])
-                #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i])
-                # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step'
-                rk_coeffs[2,i] -= k_coeff
-
-                # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i])
-                rk_coeffs[3,i] = k_coeff
-                #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i])
+                rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i]
+                rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[2,i]
+                for j ∈ 3:n_rk_stages
+                    if rk_coefs_implicit_values[j,i] != 0
+                        error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                    end
+                end
+            else
+                j = 1
+                rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i]
+                for j ∈ 2:i-1
+                    if rk_coefs_implicit_values[j,i] != 0
+                        error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                    end
+                end
+                rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[i,i]
+                if i == n_rk_stages
+                    rk_coefs_implicit_out[3,i] = 0
+                else
+                    rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[i+1,i]
+                end
+                for j ∈ i+2:n_rk_stages
+                    if rk_coefs_implicit_values[j,i] != 0
+                        error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                    end
+                end
             end
         end
-
-        #for i ∈ 1:n_rk_stages
-        #    println("k$i = ", k_subs[i])
-        #end
         if adaptive
-            error_coefficients = b[2,:] .- b[1,:]
-            #println("error_coefficients=", error_coefficients)
-            #println("error coefficients ", error_coefficients)
-            y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages)
-            y_err = simplify(expand(y_err))
-
-            # Use final column of rk_coeffs to store the coefficients used to calculate the truncation
-            # error estimate
+            i = n_rk_stages+1
             j = 1
-            rk_coeffs[1,n_rk_stages+1] = Symbolics.coeff(y_err, y[j])
-            for j ∈ 2:n_rk_stages-1
-                if Symbolics.coeff(y_err, y[j]) != 0
-                    error("Found non-zero error coefficient where zero was expected for low-storage coefficients")
+            rk_coefs_out[1,i] = rk_coefs_values[1,i]
+            for j ∈ 2:i-2
+                if rk_coefs_values[j,i] != 0
+                    error("Found non-zero coefficient where zero was expected for low-storage coefficients")
                 end
             end
-            j = n_rk_stages
-            rk_coeffs[2,n_rk_stages+1] = Symbolics.coeff(y_err, y[j])
-            j = n_rk_stages + 1
-            rk_coeffs[3,n_rk_stages+1] = Symbolics.coeff(y_err, y[j])
-        end
-    else
-        if using_rationals
-            rk_coeffs = zeros(Rational{Int64}, n_rk_stages+1, output_size)
-        else
-            rk_coeffs = zeros(n_rk_stages+1, output_size)
-        end
-        for i in 1:n_rk_stages
-            k_coeff = Symbolics.coeff(y_out[i+1], k[i])
+            rk_coefs_out[2,i] = rk_coefs_values[i-1,i]
+            rk_coefs_out[3,i] = rk_coefs_values[i,i]
 
-            for j ∈ 1:i
-                rk_coeffs[j,i] = Symbolics.coeff(y_out[i+1], y[j])
-            end
-            #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1])
-            #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i])
-            # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step'
-            rk_coeffs[i,i] -= k_coeff
-
-            # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i])
-            rk_coeffs[i+1,i] = k_coeff
-            #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i])
-        end
-
-        #for i ∈ 1:n_rk_stages
-        #    println("k$i = ", k_subs[i])
-        #end
-        if adaptive
-            error_coefficients = b[2,:] .- b[1,:]
-            #println("error_coefficients=", error_coefficients)
-            #println("error coefficients ", error_coefficients)
-            y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages)
-            y_err = simplify(expand(y_err))
-
-            # Use final column of rk_coeffs to store the coefficients used to calculate the truncation
-            # error estimate
-            for j ∈ 1:n_rk_stages+1
-                rk_coeffs[j,n_rk_stages+1] = Symbolics.coeff(y_err, y[j])
+            j = 1
+            rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i]
+            for j ∈ 2:i-2
+                if rk_coefs_implicit_values[j,i] != 0
+                    error("Found non-zero coefficient where zero was expected for low-storage coefficients")
+                end
             end
+            j = n_rk_stages
+            rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[j,i]
+            rk_coefs_implicit_out[3,i] = 0 #rk_coefs_implicit_values[j+1,i]
         end
+    else
+        rk_coefs_out = rk_coefs_values
+        rk_coefs_implicit_out = rk_coefs_implicit_values
     end
 
-    return rk_coeffs
+    return rk_coefs_out, rk_coefs_implicit_out, implicit_coefficient_is_zero
+end
+function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}},
+                                                     b::Matrix{Rational{Int64}},
+                                                     a_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(a)),
+                                                     b_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(b));
+                                                     low_storage=true)
+    a = Matrix{Rational{BigInt}}(a)
+    b = Matrix{Rational{BigInt}}(b)
+    a_implicit = Matrix{Rational{BigInt}}(a_implicit)
+    b_implicit = Matrix{Rational{BigInt}}(b_implicit)
+    return convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit;
+                                                       low_storage=low_storage)
 end
 
-function convert_rk_coeffs_to_butcher_tableau(rkcoeffs::AbstractArray{T,N}) where {T,N}
-    adaptive = (abs(sum(rkcoeffs[:,end])) < 1.0e-13)
-    low_storage = size(rkcoeffs, 1) == 3
+function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N},
+                                             adaptive,
+                                             rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1),
+                                             implicit_coefficient_is_zero=nothing
+                                            ) where {T,N}
+    using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational
+    low_storage = size(rk_coefs, 1) == 3
     if adaptive
-        n_rk_stages = size(rkcoeffs, 2) - 1
+        n_rk_stages = size(rk_coefs, 2) - 1
     else
-        n_rk_stages = size(rkcoeffs, 2)
+        n_rk_stages = size(rk_coefs, 2)
+    end
+    if implicit_coefficient_is_zero === nothing
+        implicit_coefficient_is_zero = zeros(Bool, n_rk_stages)
     end
 
-    @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages]
-    y = Symbolics.scalarize(y)
+    @variables y_tilde[1:n_rk_stages+1] yn k[1:n_rk_stages]
+    y_tilde = Symbolics.scalarize(y_tilde)
     k = Symbolics.scalarize(k)
+    @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages]
+    y = Symbolics.scalarize(y)
+    k_implicit = Symbolics.scalarize(k_implicit)
 
     if low_storage
-        for i ∈ 1:n_rk_stages
-            y[i+1] = rkcoeffs[1,i]*y[1] + rkcoeffs[2,i]*y[i] + rkcoeffs[3,i]*(y[i] + k[i])
-        end
+        y_tilde_expressions = [
+                               yn,
+                               (rk_coefs[1,i-1]*y_tilde[1] + rk_coefs[2,i-1]*y_tilde[i-1]
+                                + rk_coefs[3,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1])
+                                + rk_coefs_implicit[1,i]*y[1] + rk_coefs_implicit[2,i]*y[i-1]
+                                for i ∈ 2:n_rk_stages+1)...
+                              ]
+        y_expressions = [
+                         y_tilde_expressions[i] + rk_coefs_implicit[3,i] * k_implicit[i]
+                         for i ∈ 1:n_rk_stages
+                        ]
     else
-        for i ∈ 1:n_rk_stages
-            y[i+1] = sum(rkcoeffs[j,i]*y[j] for j ∈ 1:i) + rkcoeffs[i+1,i]*(y[i] + k[i])
-            y[i+1] = simplify(expand(y[i+1]))
+        y_tilde_expressions = [
+                               yn,
+                               (sum(rk_coefs[j,i-1]*y_tilde[j] for j ∈ 1:i-1)
+                                + rk_coefs[i,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1])
+                                + sum(rk_coefs_implicit[j,i]*y[j] for j ∈ 1:i-1)
+                                for i ∈ 2:n_rk_stages+1)...
+                              ]
+        y_expressions = [
+                         y_tilde_expressions[i] + rk_coefs_implicit[i,i] * k_implicit[i]
+                         for i ∈ 1:n_rk_stages
+                        ]
+    end
+    y_tilde_expressions = [simplify(expand(e)) for e ∈ y_tilde_expressions]
+    y_expressions = [simplify(expand(e)) for e ∈ y_expressions]
+    if adaptive
+        if low_storage
+            i = n_rk_stages + 1
+            y_loworder = rk_coefs[1,i]*y_tilde[1] + rk_coefs[2,i]*y_tilde[n_rk_stages] + rk_coefs[3,i]*y_tilde[n_rk_stages+1] +
+                         rk_coefs_implicit[1,i+1]*y[1] + rk_coefs_implicit[2,i+1]*y[n_rk_stages-1] + rk_coefs_implicit[3,i+1]*y[n_rk_stages]
+        else
+            y_loworder = sum(rk_coefs[j,n_rk_stages+1]*y_tilde[j] for j ∈ 1:n_rk_stages+1) +
+                         sum(rk_coefs_implicit[j,n_rk_stages+2]*y[j] for j ∈ 1:n_rk_stages)
         end
+        y_loworder = simplify(expand(y_loworder))
+    end
+
+    # Set up equations to solve for each y_tilde[i] and y[i] in terms of k[i] and
+    # k_impliti[i]
+    y_tilde_equations = [y_tilde[i] ~ y_tilde_expressions[i] for i ∈ 1:n_rk_stages+1]
+    y_equations = [y[i] ~ y_expressions[i] for i ∈ 1:n_rk_stages]
+    equations = vcat(y_tilde_equations, y_equations)
+    if using_rationals
+        expressions = my_solve_for2(equations, vcat(y_tilde, y))
+    else
+        expressions = Symbolics.solve_for(equations, vcat(y_tilde, y))
     end
-    #for i ∈ 1:n_rk_stages+1
-    #    println("i=$i, y[$i]=", y[i])
-    #end
+    y_tilde_k_expressions = expressions[1:n_rk_stages+1]
+    y_k_expressions = expressions[n_rk_stages+2:end]
 
     if adaptive
         b = zeros(T, 2, n_rk_stages)
+        b_implicit = zeros(T, 2, n_rk_stages)
     else
         b = zeros(T, 1, n_rk_stages)
+        b_implicit = zeros(T, 1, n_rk_stages)
     end
 
     for j ∈ 1:n_rk_stages
-        b[1, j] = Symbolics.coeff(y[n_rk_stages+1], k[j])
+        b[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k[j])
+        b_implicit[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k_implicit[j])
     end
     if adaptive
-        if low_storage
-            yerr = rkcoeffs[1,n_rk_stages+1]*y[1] +
-                   rkcoeffs[2,n_rk_stages+1]*y[n_rk_stages] +
-                   rkcoeffs[3,n_rk_stages+1]*y[n_rk_stages+1]
-        else
-            yerr = sum(rkcoeffs[j,n_rk_stages+1]*y[j] for j ∈ 1:n_rk_stages+1)
-        end
-        error_coeffs = zeros(T, n_rk_stages)
+        y_k_loworder = substitute(y_loworder, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1))
+        y_k_loworder = substitute(y_k_loworder, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages))
+        y_k_loworder = simplify(expand(y_k_loworder))
         for j ∈ 1:n_rk_stages
-            error_coeffs[j] = Symbolics.coeff(yerr, k[j])
+            b[2,j] = Symbolics.coeff(y_k_loworder, k[j])
+            b_implicit[2,j] = Symbolics.coeff(y_k_loworder, k_implicit[j])
         end
-        #println("error_coeffs=", error_coeffs)
-        # b[2,:] is the lower-order solution
-        @. b[2,:] = error_coeffs + b[1,:]
     end
 
     a = zeros(T, n_rk_stages, n_rk_stages)
+    a_implicit = zeros(T, n_rk_stages, n_rk_stages)
     for i ∈ 1:n_rk_stages
         for j ∈ 1:n_rk_stages
-            a[i,j] = Symbolics.coeff(y[i], k[j])
+            a[i,j] = Symbolics.coeff(y_k_expressions[i], k[j])
+            if j == i && implicit_coefficient_is_zero[i]
+                a_implicit[i,j] = 0
+            else
+                a_implicit[i,j] = Symbolics.coeff(y_k_expressions[i], k_implicit[j])
+            end
         end
     end
 
-    return a, b
+    return a, b, a_implicit, b_implicit
 end
 
-function convert_and_check_butcher_tableau(name, a, b; low_storage=true)
+function convert_and_check_butcher_tableau(name, a, b,
+                                           a_implicit=zeros(eltype(a), size(a)),
+                                           b_implicit=zeros(eltype(b), size(b));
+                                           low_storage=true)
+    imex = any(a_implicit .!= 0) || any(b_implicit .!= 0)
+
     println(name)
-    rk_coeffs = convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage)
+    rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero =
+        convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit;
+                                                    low_storage=low_storage)
     print("a="); display(a)
     print("b="); display(b)
-    print("rk_coeffs="); display(rk_coeffs)
+    if imex
+        print("a_implicit="); display(a_implicit)
+        print("b_implicit="); display(b_implicit)
+    end
+    print("rk_coefs="); display(rk_coefs)
+    if imex
+        print("rk_coefs_implicit="); display(rk_coefs_implicit)
+    end
+    print("rk_coefs(Float64)="); display(Float64.(rk_coefs))
+    if imex
+        print("rk_coefs_implicit(Float64)="); display(Float64.(rk_coefs_implicit))
+    end
     println("a=$a")
     println("b=$b")
-    println("rk_coeffs=$rk_coeffs")
+    if imex
+        println("a_implicit=$a_implicit")
+        println("b_implicit=$b_implicit")
+    end
+    println("rk_coefs=$rk_coefs")
+    if imex
+        println("rk_coefs_implicit=$rk_coefs_implicit")
+        println("implicit_coefficient_is_zero=$implicit_coefficient_is_zero")
+    end
     println()
 
-    check_end = size(rk_coeffs, 2)
+    check_end = size(rk_coefs, 2)
     if size(b, 1) > 1
         # Adaptive timestep
-        if abs(sum(rk_coeffs[:,end])) > 1.0e-13
-            error("Sum of error coefficients should be 0")
+        error_sum = sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end])
+        if abs(error_sum - 1) > 1.0e-13
+            error("Sum of loworder coefficients should be 1. Got ", error_sum, " ≈ ", Float64(error_sum))
         end
         check_end -= 1
+        adaptive = true
+    else
+        adaptive = false
     end
     for i ∈ 1:check_end
-        if abs(sum(rk_coeffs[:,i]) - 1) > 1.0e-13
-            error("Sum of RK coefficients should be 1 for each stage")
+        if low_storage
+            error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[:,i+1])
+        else
+            error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[1:i,i+1])
+        end
+        if abs(error_sum - 1) > 1.0e-13
+            error("Sum of RK coefficients should be 1 for each stage. Got ", error_sum, " ≈ ", Float64(error_sum))
+        end
+    end
+    if imex
+        check_end_implicit = size(rk_coefs_implicit, 2)
+        if size(b_implicit, 1) > 1
+            # Adaptive timestep
+            check_end_implicit -= 1
+        end
+        for i ∈ 1:check_end_implicit - 1
+            if !all(abs.(rk_coefs_implicit[i+1:end,i]) .< 1.0e-13)
+                error("Implicit RK coefficients should be 0 for j>i. Got ", rk_coefs_implicit[i+1:end,i], " ≈ ", Float64.(rk_coefs_implicit[i+1:end,i]))
+            end
+        end
+        for i ∈ 1:check_end_implicit - 1
+            if a_implicit[i,i] == 0
+                if rk_coefs_implicit[i,i] != 1
+                    error("Diagonal RK coefficient should be 1 when a_implicit[$i,$i]=0, got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i])
+                end
+            elseif abs(rk_coefs_implicit[i,i] - a_implicit[i,i]) > 1.0e-13
+                error("Diagonal RK coefficient should be equal to a_implicit[i,i] for each stage. Got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i] - a_implicit[i,i], " a_implicit[$i,$i]=", a_implicit[i,i])
+            end
         end
     end
 
     # Consistency check: converting back should give the original a, b.
-    a_check, b_check = convert_rk_coeffs_to_butcher_tableau(rk_coeffs)
-    #println("check?? ", a_check, " ", b_check)
+    a_check, b_check, a_check_implicit, b_check_implicit =
+        convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero)
 
-    if isa(a[1], Real)
-        if maximum(abs.(a_check .- a)) > 1.0e-13
-            error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n"
+    if eltype(a) == Rational
+        if a_check != a
+            error("Converting rk_coefs back to Butcher tableau gives different 'a':\n"
                   * "Original: $a\n"
                   * "New:      $a_check")
         end
-        if maximum(abs.(b_check .- b)) > 1.0e-13
-            error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n"
+        if b_check != b
+            error("Converting rk_coefs back to Butcher tableau gives different 'b':\n"
                   * "Original: $b\n"
                   * "New:      $b_check")
         end
     else
-        if a_check != a
-            error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n"
+        if maximum(abs.(a_check .- a)) > 1.0e-13
+            error("Converting rk_coefs back to Butcher tableau gives different 'a':\n"
                   * "Original: $a\n"
                   * "New:      $a_check")
         end
-        if b_check != b
-            error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n"
+        if maximum(abs.(b_check .- b)) > 1.0e-13
+            error("Converting rk_coefs back to Butcher tableau gives different 'b':\n"
                   * "Original: $b\n"
                   * "New:      $b_check")
         end
     end
+    if eltype(a_implicit) == Rational
+        if a_check_implicit != a_implicit
+            error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n"
+                  * "Original: $a_implicit\n"
+                  * "New:      $a_check_implicit")
+        end
+        if b_check_implicit != b_implicit
+            error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n"
+                  * "Original: $b_implicit\n"
+                  * "New:      $b_check_implicit")
+        end
+    else
+        if maximum(abs.(a_check_implicit .- a_implicit)) > 1.0e-13
+            error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n"
+                  * "Original: $a_implicit\n"
+                  * "New:      $a_check_implicit")
+        end
+        if maximum(abs.(b_check_implicit .- b_implicit)) > 1.0e-13
+            error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n"
+                  * "Original: $b_implicit\n"
+                  * "New:      $b_check_implicit")
+        end
+    end
 end
 
-function convert_and_check_rk_coeffs(name, rk_coeffs)
+function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false,
+                                    rk_coefs_implicit=zeros(eltype(rk_coefs),
+                                                            size(rk_coefs, 1),
+                                                            size(rk_coefs, 2) + 1),
+                                    implicit_coefficient_is_zero=nothing)
+    imex = any(rk_coefs_implicit .!= 0)
+
     println(name)
 
-    print("rk_coeffs="); display(rk_coeffs)
-    a, b = convert_rk_coeffs_to_butcher_tableau(rk_coeffs)
+    print("rk_coefs="); display(rk_coefs)
+    if imex
+        print("rk_coefs_implicit="); display(rk_coefs_implicit)
+    end
+    a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero)
     print("a="); display(a)
     print("b="); display(b)
+    if imex
+        print("a_implicit="); display(a_implicit)
+        print("b_implicit="); display(b_implicit)
+    end
     println("a=$a")
     println("b=$b")
+    if imex
+        println("a_implicit=$a_implicit")
+        println("b_implicit=$b_implicit")
+    end
     println()
 end
 
@@ -514,6 +909,12 @@ convert_and_check_butcher_tableau(
     construct_fekete_3rd_order(4)...
    )
 
+convert_and_check_butcher_tableau(
+    "Fekete 4(3) not low-storage",
+    construct_fekete_3rd_order(4)...;
+    low_storage=false
+   )
+
 """
     construct_fekete_2nd_order(nstage)
 
@@ -561,7 +962,7 @@ convert_and_check_butcher_tableau(
     construct_fekete_2nd_order(2)...
    )
 
-convert_and_check_rk_coeffs(
+convert_and_check_rk_coefs(
     "mk's ssprk4",
     [1//2 0    2//3 0   ;
      1//2 1//2 0    0   ;
@@ -570,7 +971,7 @@ convert_and_check_rk_coeffs(
      0    0    0    1//2],
    )
 
-convert_and_check_rk_coeffs(
+convert_and_check_rk_coefs(
     "mk's ssprk3",
     [0  3//4 1//3;
      1  0    0   ;
@@ -578,9 +979,90 @@ convert_and_check_rk_coeffs(
      0  0    2//3],
    )
 
-convert_and_check_rk_coeffs(
+convert_and_check_rk_coefs(
     "mk's ssprk2",
     [0 1//2;
      0 0   ;
      1 1//2],
    )
+
+println("\n\nIMEX methods\n============\n")
+
+# 4th-order, 7-stage IMEX method 'ARK4(3)7L[2]SA₁' from Kennedy & Carpenter 2019
+# (https://doi.org/10.1016/j.apnum.2018.10.007)
+convert_and_check_butcher_tableau(
+    "KennedyCarpenterARK437",
+    Rational{BigInt}[0                              0                              0                              0                              0                             0                             0;
+                     247//1000                      0                              0                              0                              0                             0                             0;
+                     247//4000                      2694949928731//7487940209513   0                              0                              0                             0                             0;
+                     464650059369//8764239774964    878889893998//2444806327765   -952945855348//12294611323341   0                              0                             0                             0;
+                     476636172619//8159180917465   -1271469283451//7793814740893  -859560642026//4356155882851    1723805262919//4571918432560   0                             0                             0;
+                     6338158500785//11769362343261 -4970555480458//10924838743837  3326578051521//2647936831840  -880713585975//1841400956686   -1428733748635//8843423958496  0                             0;
+                     760814592956//3276306540349    760814592956//3276306540349   -47223648122716//6934462133451  71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0;
+    ],
+    Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000  ;
+                     0 0 4469248916618//8635866897933  -621260224600//4094290005349    696572312987//2942599194819  1532940081127//5565293938103 2441//20000],
+    Rational{BigInt}[0                               0                              0                              0                              0                             0                            0          ;
+                     1235//10000                     1235//10000                    0                              0                              0                             0                            0          ;
+                     624185399699//4186980696204     624185399699//4186980696204    1235//10000                    0                              0                             0                            0          ;
+                     1258591069120//10082082980243   1258591069120//10082082980243 -322722984531//8455138723562    1235//10000                    0                             0                            0          ;
+                     -436103496990//5971407786587   -436103496990//5971407786587   -2689175662187//11046760208243  4431412449334//12995360898505  1235//10000                   0                            0          ;
+                     -2207373168298//14430576638973 -2207373168298//14430576638973  242511121179//3358618340039    3145666661981//7780404714551   5882073923981//14490790706663 1235//10000                  0          ;
+                     0                               0                              9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217  2273837961795//8368240463276 1235//10000;
+                    ],
+    Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000  ;
+                     0 0 4469248916618//8635866897933  -621260224600//4094290005349    696572312987//2942599194819  1532940081127//5565293938103 2441//20000],
+    ; low_storage=false)
+
+# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit
+# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct.
+## 5th-order, 8-stage IMEX method 'ARK5(4)8L[2]SA₂' from Kennedy & Carpenter 2019
+## (https://doi.org/10.1016/j.apnum.2018.10.007)
+#convert_and_check_butcher_tableau(
+#    "KennedyCarpenterARK548",
+#    Rational{BigInt}[ 0                               0                             0                              0                              0                              0                              0                            0;
+#                      4//9                            0                             0                              0                              0                              0                              0                            0;
+#                      1//9                            1183333538310//1827251437969  0                              0                              0                              0                              0                            0;
+#                      895379019517//9750411845327     477606656805//13473228687314 -112564739183//9373365219272    0                              0                              0                              0                            0;
+#                      -4458043123994//13015289567637 -2500665203865//9342069639922  983347055801//8893519644487    2185051477207//2551468980502   0                              0                              0                            0;
+#                      -167316361917//17121522574472   1605541814917//7619724128744  991021770328//13052792161721   2342280609577//11279663441611  3012424348531//12792462456678  0                              0                            0;
+#                      6680998715867//14310383562358   5029118570809//3897454228471  2415062538259//6382199904604  -3924368632305//6964820224454  -4331110370267//15021686902756 -3944303808049//11994238218192  0                            0;
+#                      2193717860234//3570523412979    2193717860234//3570523412979  5952760925747//18750164281544 -4412967128996//6196664114337   4151782504231//36106512998704  572599549169//6265429158920   -457874356192//11306498036315 0;
+#                    ],
+#    Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999  2//9                        ;
+#                      0 0 520639020421//8300446712847   4550235134915//17827758688493 1482366381361//6201654941325  5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926;
+#                    ],
+#    Rational{BigInt}[ 0                             0                             0                             0                              0                           0                            0    0   ;
+#                      2//9                          2//9                          0                             0                              0                           0                            0    0   ;
+#                      2366667076620//8822750406821  2366667076620//8822750406821  2//9                          0                              0                           0                            0    0   ;
+#                     -257962897183//4451812247028  -257962897183//4451812247028   128530224461//14379561246022  2//9                           0                           0                            0    0   ;
+#                     -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617   1741320951451//6824444397158   2//9                        0                            0    0   ;
+#                      621307788657//4714163060173   621307788657//4714163060173  -125196015625//3866852212004   940440206406//7593089888465    961109811699//6734810228204 2//9                         0    0   ;
+#                      2036305566805//6583108094622  2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0   ;
+#                      0                             0                             0                             0                              0                           0                            0    2//9;
+#                    ],
+#    Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999  2//9                        ;
+#                      0 0 520639020421//8300446712847   4550235134915//17827758688493 1482366381361//6201654941325  5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926;
+#                    ],
+#   ; low_storage=false)
+
+# 3rd-order, 4-stage IMEX method from Kennedy & Carpenter 2003
+# (https://doi.org/10.1016/S0168-9274(02)00138-1,
+# https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf)
+convert_and_check_butcher_tableau(
+    "KennedyCarpenterARK324",
+    Rational{BigInt}[0                              0                            0                               0;
+                     1767732205903//2027836641118   0                            0                               0;
+                     5535828885825//10492691773637  788022342437//10882634858940 0                               0;
+                     6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841  0;
+    ],
+    Rational{BigInt}[1471266399579//7840856788654  -4482444167858//7529755066697   11266239266428//11593286722821 1767732205903//4055673282236;
+                     2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117  2193209047091//5459859503100],
+    Rational{BigInt}[0                              0                            0                               0                           ;
+                     1767732205903//4055673282236   1767732205903//4055673282236 0                               0                           ;
+                     2746238789719//10658868560708 -640167445237//6845629431997  1767732205903//4055673282236    0                           ;
+                     1471266399579//7840856788654  -4482444167858//7529755066697 11266239266428//11593286722821  1767732205903//4055673282236;
+                    ],
+    Rational{BigInt}[1471266399579//7840856788654  -4482444167858//7529755066697   11266239266428//11593286722821 1767732205903//4055673282236;
+                     2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117  2193209047091//5459859503100],
+    ; low_storage=false)
diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl
index 5ca7add5a..dd77ab06f 100644
--- a/util/test-rk-timestep.jl
+++ b/util/test-rk-timestep.jl
@@ -6,19 +6,34 @@ function f(y)
     return y
     #return 1.0
 end
+function f_implicit(y, dt)
+    # Calculate derivative at the end of a 'backward Euler' step so that
+    #   (y_out - y)/dt = f(y_out)
+    # ⇒ y_out - y = dt * f(y_out) = dt * y_out
+    # ⇒ y_out = y / (1 - dt)
+    return f(y/(1 - dt))
+end
+function backward_euler(y, dt)
+    # Do a 'backward Euler' solve so that
+    #   (y_out - y)/dt = f(y_out)
+    # ⇒ y_out - y = dt * f(y_out) = dt * y_out
+    # ⇒ y_out = y / (1 - dt)
+    return y / (1 - dt)
+end
 y0 = 1.0
 nsteps = 100 * multiplier
 
 t = [i*dt for i ∈ 0:nsteps]
 analytic = @. y0*exp(t)
+analytic_implicit = @. y0*exp(2*t)
 #analytic = @. 1.0 + t
 
-function rk_advance(rk_coeffs, y0, dt, nsteps)
-    n_rk_stages = size(rk_coeffs, 1) - 1
-    #println("n_rk_stages=$n_rk_stages, ", size(rk_coeffs))
+function rk_advance_explicit(rk_coefs, y0, dt, nsteps)
+    n_rk_stages = size(rk_coefs, 1) - 1
+    #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs))
     yscratch = zeros(n_rk_stages + 1)
     yscratch[1] = y0
-    adaptive = size(rk_coeffs, 2) > n_rk_stages
+    adaptive = size(rk_coefs, 2) > n_rk_stages
 
     result = zeros(nsteps+1)
     result[1] = y0
@@ -28,8 +43,58 @@ function rk_advance(rk_coeffs, y0, dt, nsteps)
     for it ∈ 1:nsteps
         for istage ∈ 1:n_rk_stages
             yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage])
-            this_coeffs = rk_coeffs[:,istage]
-            yscratch[istage+1] = sum(this_coeffs[i]*yscratch[i] for i ∈ 1:istage+1)
+            this_coefs = rk_coefs[:,istage]
+            yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1)
+        end
+        #k1 = 2*(yscratch[2] - yscratch[1])
+        #k2 = 2*(yscratch[3] - yscratch[1])
+        #k3 = yscratch[4] - yscratch[1]
+        #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3
+        #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4)
+        if adaptive
+            loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1)
+            error[it+1] = loworder - yscratch[end]
+        end
+        yscratch[1] = yscratch[end]
+        result[it+1] = yscratch[end]
+    end
+
+    return result, error
+end
+
+function rk_advance(rk_coefs, y0, dt, nsteps, rk_coefs_implicit=nothing, implicit_coefficient_is_zero=nothing)
+
+    n_rk_stages = size(rk_coefs, 1) - 1
+
+    if rk_coefs_implicit === nothing && implicit_coefficient_is_zero === nothing
+        rk_coefs_implicit = zeros(n_rk_stages, n_rk_stages + 2)
+        implicit_coefficient_is_zero = zeros(Bool, n_rk_stages)
+    end
+
+    #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs))
+    yscratch = zeros(n_rk_stages + 1)
+    yscratch_implicit = zeros(n_rk_stages)
+    yscratch[1] = y0
+    adaptive = size(rk_coefs, 2) > n_rk_stages
+
+    result = zeros(nsteps+1)
+    result[1] = y0
+
+    error = zeros(nsteps+1)
+
+    for it ∈ 1:nsteps
+        for istage ∈ 1:n_rk_stages
+            if implicit_coefficient_is_zero[istage]
+                yscratch_implicit[istage] = yscratch[istage] + dt*f_implicit(yscratch[istage], 0.0)
+                yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage])
+            else
+                yscratch_implicit[istage] = backward_euler(yscratch[istage], dt*rk_coefs_implicit[istage,istage])
+                yscratch[istage+1] = yscratch_implicit[istage] + dt*f(yscratch_implicit[istage])
+            end
+            this_coefs = rk_coefs[:,istage]
+            this_coefs_implicit = rk_coefs_implicit[:,istage+1]
+            yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1) +
+                                 sum(this_coefs_implicit[i]*yscratch_implicit[i] for i ∈ 1:istage)
         end
         #k1 = 2*(yscratch[2] - yscratch[1])
         #k2 = 2*(yscratch[3] - yscratch[1])
@@ -37,7 +102,9 @@ function rk_advance(rk_coeffs, y0, dt, nsteps)
         #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3
         #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4)
         if adaptive
-            error[it+1] = sum(rk_coeffs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1)
+            loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) +
+                       sum(rk_coefs_implicit[i, n_rk_stages+2]*yscratch_implicit[i] for i ∈ 1:n_rk_stages)
+            error[it+1] = loworder - yscratch[end]
         end
         yscratch[1] = yscratch[end]
         result[it+1] = yscratch[end]
@@ -46,8 +113,8 @@ function rk_advance(rk_coeffs, y0, dt, nsteps)
     return result, error
 end
 
-function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps)
-    n_rk_stages = size(rk_coeffs, 2)
+function rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps)
+    n_rk_stages = size(rk_coefs, 2)
     println("check n_rk_stages=$n_rk_stages")
 
     yscratch = zeros(n_rk_stages + 1)
@@ -59,9 +126,9 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps)
     for it ∈ 1:nsteps
         for istage ∈ 1:n_rk_stages
             yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage])
-            this_coeffs = rk_coeffs[:,istage]
-            #println("istage=$istage, this_coeffs=$this_coeffs")
-            yscratch[istage+1] = this_coeffs[1]*yscratch[1] + this_coeffs[2]*yscratch[istage] + this_coeffs[3]*yscratch[istage+1]
+            this_coefs = rk_coefs[:,istage]
+            #println("istage=$istage, this_coefs=$this_coefs")
+            yscratch[istage+1] = this_coefs[1]*yscratch[1] + this_coefs[2]*yscratch[istage] + this_coefs[3]*yscratch[istage+1]
             #println("istage=$istage, ", yscratch[istage+1])
         end
         #println("before yscratch=$yscratch")
@@ -73,7 +140,7 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps)
     return result
 end
 
-function rk_advance_butcher(a, b, y0, dt, nsteps)
+function rk_advance_butcher_explicit(a, b, y0, dt, nsteps)
     n_rk_stages = size(a, 2)
     kscratch = zeros(n_rk_stages)
     y = y0
@@ -105,6 +172,55 @@ function rk_advance_butcher(a, b, y0, dt, nsteps)
     return result, error
 end
 
+function rk_advance_butcher(a, b, y0, dt, nsteps, a_implicit=nothing, b_implicit=nothing)
+    n_rk_stages = size(a, 2)
+
+    if a_implicit === nothing && b_implicit === nothing
+        a_implicit = zeros(n_rk_stages, n_rk_stages)
+        b_implicit = zeros(size(b))
+    end
+
+    kscratch = zeros(n_rk_stages)
+    kscratch_implicit = zeros(n_rk_stages)
+    y = y0
+    if ndims(b) == 1
+        b = b'
+    end
+    adaptive = size(b, 1) > 1
+
+    result = zeros(nsteps+1)
+    result[1] = y0
+
+    error = zeros(nsteps+1)
+
+    for it ∈ 1:nsteps
+        kscratch[1] = dt*f(y)
+        kscratch_implicit[1] = dt*f_implicit(y, a_implicit[1,1] * dt)
+        for i ∈ 2:n_rk_stages
+            ytilde = y +
+                     sum(a[i,j] * kscratch[j] for j ∈ 1:i-1) +
+                     sum(a_implicit[i,j] * kscratch_implicit[j] for j ∈ 1:i-1)
+            ystage = backward_euler(ytilde, dt * a_implicit[i,i])
+            kscratch_implicit[i] = dt*f_implicit(ytilde, dt * a_implicit[i,i])
+            kscratch[i] = dt*f(ystage)
+        end
+        if adaptive
+            y_loworder = y +
+                         sum(b[2,j]*kscratch[j] for j ∈ 1:n_rk_stages) +
+                         sum(b_implicit[2,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages)
+        end
+        y = y +
+            sum(b[1,j]*kscratch[j] for j ∈ 1:n_rk_stages) +
+            sum(b_implicit[1,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages)
+        if adaptive
+            error[it+1] = y_loworder - y
+        end
+        result[it+1] = y
+    end
+
+    return result, error
+end
+
 function rk4_by_hand(y0, dt, nsteps)
     result = zeros(nsteps+1)
     y = y0
@@ -122,31 +238,31 @@ function rk4_by_hand(y0, dt, nsteps)
 end
 
 methods = Dict(
-    "SSPRK3" => (rk_coeffs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3],
+    "SSPRK3" => (rk_coefs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3],
                  a=Float64[0 0 0; 1 0 0; 1//4 1//4 0],
                  b=Float64[1//6 1//6 2//3]),
 
-    "RK4" => (rk_coeffs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6],
+    "RK4" => (rk_coefs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6],
               a = Float64[0 0 0 0; 1//2 0 0 0; 0 1//2 0 0; 0 0 1 0],
               b = Float64[1//6 1//3 1//3 1//6]),
 
-    "RKF45" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 -1],
+    "RKF45" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 0],
                 a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0],
                 b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55; 25//216 0 1408//2565 2197//4104 -1//5 0]),
 
-    "RKF45 truncated" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55],
+    "RKF45 truncated" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55],
                 a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0],
                 b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55]),
 
-    "Heun SSPRK2" => (rk_coeffs = Float64[0 1//2; 1 0; 0 1//2],
+    "Heun SSPRK2" => (rk_coefs = Float64[0 1//2; 1 0; 0 1//2],
                       a = Float64[0 0; 1 0],
                       b = Float64[1//2 1//2]),
 
-    "Gottlieb 43" => (rk_coeffs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3],
+    "Gottlieb 43" => (rk_coefs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3],
                       a = Float64[0 0 0; 1 0 0; 1//2 1//2 0],
                       b = Float64[1//6 1//6 2//3]),
 
-    "mk ssprk3" => (rk_coeffs = Float64[1//2 0    2//3 0   ;
+    "mk ssprk3" => (rk_coefs = Float64[1//2 0    2//3 0   ;
                                         1//2 1//2 0    0   ;
                                         0    1//2 1//6 0   ;
                                         0    0    1//6 1//2;
@@ -154,50 +270,91 @@ methods = Dict(
                     a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0],
                     b = Float64[1//6 1//6 1//6 1//2]),
 
-    "mk ssprk2" => (rk_coeffs = Float64[0.0 0.5 0.0;
-                                        1.0 0.0 0.0;
-                                        0.0 0.5 0.0],
+    "mk ssprk2" => (rk_coefs = Float64[0.0 0.5;
+                                        1.0 0.0;
+                                        0.0 0.5],
                     a = Float64[0.0 0.0; 1.0 0.0],
                     b = Float64[0.5 0.5; 0.5 0.5]),
 
-    "Fekete 43" => (rk_coeffs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 -1//2],
+    "Fekete 43" => (rk_coefs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 1//2],
                     a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0],
                     b = Float64[1//6 1//6 1//6 1//2; 1//4 1//4 1//4 1//4]),
 
-    "Fekete 43 truncated" => (rk_coeffs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2],
+    "Fekete 43 truncated" => (rk_coefs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2],
                     a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0],
                     b = Float64[1//6 1//6 1//6 1//2]),
 
-    "Fekete 42" => (rk_coeffs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 -1//4],
+    "Fekete 42" => (rk_coefs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 3//4],
                     a = Float64[0 0 0 0; 1//3 0 0 0; 1//3 1//3 0 0; 1//3 1//3 1//3 0],
                     b = Float64[1//4 1//4 1//4 1//4; 5//16 1//4 1//4 3//16]),
 
-    "Fekete 10,4" => (rk_coeffs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 -1],
+    "Fekete 10,4" => (rk_coefs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 0],
                       a = Float64[0 0 0 0 0 0 0 0 0 0; 1//6 0 0 0 0 0 0 0 0 0; 1//6 1//6 0 0 0 0 0 0 0 0; 1//6 1//6 1//6 0 0 0 0 0 0 0; 1//6 1//6 1//6 1//6 0 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 1//6 0],
                       b = Float64[1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10; 1//5 0 0 3//10 0 0 1//5 0 3//10 0]),
 
-    "Fekete 6,4" => (rk_coeffs = [0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094; 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622; 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522; 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305; 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745; 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524],
+    "Fekete 6,4" => (rk_coefs = [0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 -6.304828384656085e-17 -0.1813232670344333 -0.0017300417984673633; 0.3552975516919 0.4295138541066736 -6.460461358323626e-14 -1.1868936325049587e-13 3.608184516786869e-18 2.9392365006883485e-14 -0.18902907903375094; -0.0 0.33178669836 0.25530138316744333 -3.3545605887402925e-14 -1.0929532856876731e-17 0.20598080026766677 0.2504712436879622; -0.0 -0.0 0.1972127376054 0.3518900216285391 7.036963218665071e-17 0.47926701162417157 -0.939747918037452; -0.0 -0.0 -0.0 0.2718245927242 0.5641843457422999 9.97599117309567e-14 1.1993626679930303; -0.0 -0.0 -0.0 -0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745; -0.0 -0.0 -0.0 -0.0 -0.0 0.1544186678729 1.2117066988196523],
                      a = [0.0 0.0 0.0 0.0 0.0 0.0; 0.3552975516919 0.0 0.0 0.0 0.0 0.0; 0.2704882223931 0.33178669836 0.0 0.0 0.0 0.0; 0.1223997401356 0.1501381660925 0.1972127376054 0.0 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.4358156542577 0.0],
                      b = [0.1522491819555 0.1867521364225 0.1555370561501 0.1348455085546 0.2161974490441 0.1544186678729; 0.1210663237182 0.230884400455 0.0853424972752 0.3450614904457 0.0305351538213 0.1871101342844]),
+
+    "KennedyCarpenterARK437" => (rk_coefs = Float64[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; 0 0 0 0 0 0 247//2000 2441//2470],
+                                rk_coefs_implicit = Float64[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; 0 0 0 0 0 0 247//2000 1753//2000 0],
+                                implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0],
+                                a = Float64[0 0 0 0 0 0 0; 247//1000 0 0 0 0 0 0; 247//4000 2694949928731//7487940209513 0 0 0 0 0; 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0],
+                                b = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000],
+                                a_implicit = Float64[0 0 0 0 0 0 0; 247//2000 247//2000 0 0 0 0 0; 624185399699//4186980696204 624185399699//4186980696204 247//2000 0 0 0 0; 1258591069120//10082082980243 1258591069120//10082082980243 -322722984531//8455138723562 247//2000 0 0 0; -436103496990//5971407786587 -436103496990//5971407786587 -2689175662187//11046760208243 4431412449334//12995360898505 247//2000 0 0; -2207373168298//14430576638973 -2207373168298//14430576638973 242511121179//3358618340039 3145666661981//7780404714551 5882073923981//14490790706663 247//2000 0; 0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000],
+                                b_implicit = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000],
+                               ),
+
+# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit
+# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct.
+#    "KennedyCarpenterARK548" => (rk_coefs=Rational{BigInt}[],
+#                                rk_coefs_implicit = Float64[],
+#                                implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0, 0],
+#                                a = Float64[0 0 0 0 0 0 0 0; 4//9 0 0 0 0 0 0 0; 1//9 1183333538310//1827251437969 0 0 0 0 0 0; 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0],
+#                                b = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926],
+#                                a_implicit = Float64[0 0 0 0 0 0 0 0; 2//9 2//9 0 0 0 0 0 0; 2366667076620//8822750406821 2366667076620//8822750406821 2//9 0 0 0 0 0; -257962897183//4451812247028 -257962897183//4451812247028 128530224461//14379561246022 2//9 0 0 0 0; -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617 1741320951451//6824444397158 2//9 0 0 0; 621307788657//4714163060173 621307788657//4714163060173 -125196015625//3866852212004 940440206406//7593089888465 961109811699//6734810228204 2//9 0 0; 2036305566805//6583108094622 2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0; 0 0 0 0 0 0 0 2//9],
+#                                b_implicit = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926],
+#                               ),
+
+    "KennedyCarpenterARK324" => (rk_coefs = Float64[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; 0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825],
+                                 rk_coefs_implicit = Float64[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0],
+                                 implicit_coefficient_is_zero = Bool[1, 0, 0, 0],
+                                 a = Float64[0 0 0 0; 1767732205903//2027836641118 0 0 0; 5535828885825//10492691773637 788022342437//10882634858940 0 0; 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0],
+                                 b = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100],
+                                 a_implicit = Float64[0 0 0 0; 1767732205903//4055673282236 1767732205903//4055673282236 0 0; 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0; 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236],
+                                 b_implicit = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100]
+                                ),
    )
 
-a, b = convert_rk_coeffs_to_butcher_tableau(methods["RKF45"].rk_coeffs)
-methods["RKF45 attempt 2"] = (rk_coeffs = methods["RKF45"].rk_coeffs,
+a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true)
+methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs,
                              a = a, b = b)
 
 for (k,v) ∈ methods
+    imex = any(:rk_coefs_implicit ∈ keys(v))
+
     println("\n", k)
-    result, error = rk_advance(v.rk_coeffs, y0, dt, nsteps)
-    result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps)
+
+    if imex
+        this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps, v.rk_coefs_implicit, v.implicit_coefficient_is_zero)
+        result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps, v.a_implicit, v.b_implicit)
+    else
+        this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps)
+        result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps)
+    end
 
     #for i ∈ 1:multiplier:nsteps+1
-    #    println("$i t=", t[i], " analytic=", analytic[i], " result=", result[i], " result_butcher=", result_butcher[i])
+    #    println("$i t=", t[i], " analytic=", analytic[i], " result=", this_result[i], " result_butcher=", result_butcher[i])
     #end
     println("t=", t[end])
-    println("analytic       = ", analytic[end])
-    println("result         = ", result[end])
+    if imex
+        println("analytic       = ", analytic_implicit[end])
+    else
+        println("analytic       = ", analytic[end])
+    end
+    println("result         = ", this_result[end])
     println("result_butcher = ", result_butcher[end])
-    println("error         = ", error[end])
+    println("error         = ", this_error[end])
     println("error_butcher = ", error_butcher[end])
 end
 
@@ -230,7 +387,7 @@ elseif n_rk_stages == 1
 else
     error("Unsupported number of RK stages, n_rk_stages=$n_rk_stages")
 end
-result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps)
+ssprk3_result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps)
 println("t=", t[end])
 println("analytic       = ", analytic[end])
-println("result         = ", result[end])
+println("result         = ", ssprk3_result[end])