diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index 8db3f061..93e211f5 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -27,14 +27,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os:
-          - "ubuntu-latest"
-          - "macos-latest"
-          - "windows-latest"
         group:
-          - "CPU"
+          - "MIRK"
+          - "MISC"
+          - "SHOOTING"
+          - "FIRK(EXPANDED)"
+          - "FIRK(NESTED)"
+          - "WRAPPERS"
     uses: "SciML/.github/.github/workflows/tests.yml@v1"
     with:
-      os: "${{ matrix.os }}"
       group: "${{ matrix.group }}"
     secrets: "inherit"
diff --git a/Project.toml b/Project.toml
index e6ad83d8..64a483fc 100644
--- a/Project.toml
+++ b/Project.toml
@@ -36,7 +36,7 @@ BoundaryValueDiffEqODEInterfaceExt = "ODEInterface"
 [compat]
 ADTypes = "1.2"
 Adapt = "4"
-Aqua = "0.8"
+Aqua = "0.8.7"
 ArrayInterface = "7.7"
 BandedMatrices = "1.4"
 ConcreteStructs = "0.2.3"
@@ -51,7 +51,7 @@ LinearSolve = "2.21"
 Logging = "1.10"
 NonlinearSolve = "3.8.1"
 ODEInterface = "0.5"
-OrdinaryDiffEq = "6.63"
+OrdinaryDiffEq = "6.88.1"
 PreallocationTools = "0.4.24"
 PrecompileTools = "1.2"
 Preferences = "1.4"
diff --git a/src/BoundaryValueDiffEq.jl b/src/BoundaryValueDiffEq.jl
index 1b7a3d26..cb6055b1 100644
--- a/src/BoundaryValueDiffEq.jl
+++ b/src/BoundaryValueDiffEq.jl
@@ -2,9 +2,9 @@ module BoundaryValueDiffEq
 
 import PrecompileTools: @compile_workload, @setup_workload
 
-using ADTypes, Adapt, ArrayInterface, DiffEqBase, ForwardDiff, LinearAlgebra, NonlinearSolve,
-      OrdinaryDiffEq, Preferences, RecursiveArrayTools, Reexport, SciMLBase, Setfield,
-      SparseDiffTools
+using ADTypes, Adapt, ArrayInterface, DiffEqBase, ForwardDiff, LinearAlgebra,
+      NonlinearSolve, OrdinaryDiffEq, Preferences, RecursiveArrayTools, Reexport, SciMLBase,
+      Setfield, SparseDiffTools
 
 using PreallocationTools: PreallocationTools, DiffCache
 
@@ -30,9 +30,12 @@ include("algorithms.jl")
 include("alg_utils.jl")
 
 include("mirk_tableaus.jl")
+include("lobatto_tableaus.jl")
+include("radau_tableaus.jl")
 
 include("solve/single_shooting.jl")
 include("solve/multiple_shooting.jl")
+include("solve/firk.jl")
 include("solve/mirk.jl")
 
 include("collocation.jl")
@@ -108,8 +111,7 @@ end
         resid[3] = solₜ₂[2] + 1.729109
         return nothing
     end
-    bc1_nlls = (sol, p, t) -> [
-        sol[:, 1][1], sol[:, end][1] - 1, sol[:, end][2] + 1.729109]
+    bc1_nlls = (sol, p, t) -> [sol[:, 1][1], sol[:, end][1] - 1, sol[:, end][2] + 1.729109]
 
     bc1_nlls_a! = (resid, ua, p) -> (resid[1] = ua[1])
     bc1_nlls_b! = (resid, ub, p) -> (resid[1] = ub[1] - 1;
@@ -205,8 +207,7 @@ end
         resid[3] = solₜ₂[2] + 1.729109
         return nothing
     end
-    bc1_nlls = (sol, p, t) -> [
-        sol(0.0)[1], sol(100.0)[1] - 1, sol(1.0)[2] + 1.729109]
+    bc1_nlls = (sol, p, t) -> [sol(0.0)[1], sol(100.0)[1] - 1, sol(1.0)[2] + 1.729109]
 
     tspan = (0.0, 100.0)
     u0 = [0.0, 1.0]
@@ -273,6 +274,10 @@ export Shooting, MultipleShooting
 export MIRK2, MIRK3, MIRK4, MIRK5, MIRK6
 export BVPM2, BVPSOL, COLNEW # From ODEInterface.jl
 
+export RadauIIa1, RadauIIa2, RadauIIa3, RadauIIa5, RadauIIa7
+export LobattoIIIa2, LobattoIIIa3, LobattoIIIa4, LobattoIIIa5
+export LobattoIIIb2, LobattoIIIb3, LobattoIIIb4, LobattoIIIb5
+export LobattoIIIc2, LobattoIIIc3, LobattoIIIc4, LobattoIIIc5
 export MIRKJacobianComputationAlgorithm, BVPJacobianAlgorithm
 
 end
diff --git a/src/adaptivity.jl b/src/adaptivity.jl
index 26bf09bc..89a58a3f 100644
--- a/src/adaptivity.jl
+++ b/src/adaptivity.jl
@@ -12,6 +12,124 @@ After we construct an interpolant, we use interp_eval to evaluate it.
     return y
 end
 
+@views function interp_eval!(
+        y::AbstractArray, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt) where {iip}
+    i = findfirst(x -> x == y, cache.y₀.u)
+    interp_eval!(cache.y₀.u, i, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt)
+    return y
+end
+
+@views function interp_eval!(
+        y::AbstractArray, i::Int, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt) where {iip}
+    j = interval(mesh, t)
+    h = mesh_dt[j]
+    lf = (length(cache.y₀) - 1) / (length(cache.y) - 1) # Cache length factor. We use a h corresponding to cache.y. Note that this assumes equidistributed mesh
+    if lf > 1
+        h *= lf
+    end
+    τ = (t - mesh[j])
+
+    (; f, M, p, ITU) = cache
+    (; q_coeff, stage) = ITU
+
+    K = __similar(cache.y[1].du, M, stage)
+
+    ctr_y0 = (i - 1) * (stage + 1) + 1
+    ctr_y = (j - 1) * (stage + 1) + 1
+
+    yᵢ = cache.y[ctr_y].du
+    yᵢ₊₁ = cache.y[ctr_y + stage + 1].du
+
+    if iip
+        dyᵢ = similar(yᵢ)
+        dyᵢ₊₁ = similar(yᵢ₊₁)
+
+        f(dyᵢ, yᵢ, p, mesh[j])
+        f(dyᵢ₊₁, yᵢ₊₁, p, mesh[j + 1])
+    else
+        dyᵢ = f(yᵢ, p, mesh[j])
+        dyᵢ₊₁ = f(yᵢ₊₁, p, mesh[j + 1])
+    end
+
+    # Load interpolation residual
+    for jj in 1:stage
+        K[:, jj] = cache.y[ctr_y + jj].du
+    end
+
+    z₁, z₁′ = eval_q(yᵢ, 0.5, h, q_coeff, K) # Evaluate q(x) at midpoints
+    S_coeffs = get_S_coeffs(h, yᵢ, yᵢ₊₁, z₁, dyᵢ, dyᵢ₊₁, z₁′)
+
+    S_interpolate!(y, τ, S_coeffs)
+    return y
+end
+
+@views function interp_eval!(
+        y::AbstractArray, cache::FIRKCacheNested{iip, T}, t, mesh, mesh_dt) where {iip, T}
+    (; f, ITU, nest_prob, nest_tol, alg) = cache
+    (; q_coeff) = ITU
+
+    j = interval(mesh, t)
+    h = mesh_dt[j]
+    lf = (length(cache.y₀) - 1) / (length(cache.y) - 1) # Cache length factor. We use a h corresponding to cache.y. Note that this assumes equidistributed mesh
+    if lf > 1
+        h *= lf
+    end
+    τ = (t - mesh[j])
+
+    nest_nlsolve_alg = __concrete_nonlinearsolve_algorithm(nest_prob, alg.nlsolve)
+    nestprob_p = zeros(T, cache.M + 2)
+
+    yᵢ = copy(cache.y[j].du)
+    yᵢ₊₁ = copy(cache.y[j + 1].du)
+
+    if iip
+        dyᵢ = similar(yᵢ)
+        dyᵢ₊₁ = similar(yᵢ₊₁)
+
+        f(dyᵢ, yᵢ, cache.p, mesh[j])
+        f(dyᵢ₊₁, yᵢ₊₁, cache.p, mesh[j + 1])
+    else
+        dyᵢ = f(yᵢ, cache.p, mesh[j])
+        dyᵢ₊₁ = f(yᵢ₊₁, cache.p, mesh[j + 1])
+    end
+
+    nestprob_p[1] = mesh[j]
+    nestprob_p[2] = mesh_dt[j]
+    nestprob_p[3:end] .= yᵢ
+
+    _nestprob = remake(nest_prob, p = nestprob_p)
+    nestsol = __solve(_nestprob, nest_nlsolve_alg; abstol = nest_tol)
+    K = nestsol.u
+
+    z₁, z₁′ = eval_q(yᵢ, 0.5, h, q_coeff, K) # Evaluate q(x) at midpoints
+    S_coeffs = get_S_coeffs(h, yᵢ, yᵢ₊₁, z₁, dyᵢ, dyᵢ₊₁, z₁′)
+
+    S_interpolate!(y, τ, S_coeffs)
+    return y
+end
+
+function get_S_coeffs(h, yᵢ, yᵢ₊₁, dyᵢ, dyᵢ₊₁, ymid, dymid)
+    vals = vcat(yᵢ, yᵢ₊₁, dyᵢ, dyᵢ₊₁, ymid, dymid)
+    M = length(yᵢ)
+    A = s_constraints(M, h)
+    coeffs = reshape(A \ vals, 6, M)'
+    return coeffs
+end
+
+# S forward Interpolation
+function S_interpolate!(y::AbstractArray, t, coeffs)
+    ts = [t^(i - 1) for i in axes(coeffs, 2)]
+    y .= coeffs * ts
+end
+
+function dS_interpolate!(dy::AbstractArray, t, S_coeffs)
+    ts = zeros(size(S_coeffs, 2))
+    for i in 2:size(S_coeffs, 2)
+        ts[i] = (i - 1) * t^(i - 2)
+    end
+    dy .= S_coeffs * ts
+end
+
 """
     interval(mesh, t)
 
@@ -26,10 +144,11 @@ end
 
 Generate new mesh based on the defect.
 """
-@views function mesh_selector!(cache::MIRKCache{iip, T}) where {iip, T}
-    (; M, order, defect, mesh, mesh_dt) = cache
+@views function mesh_selector!(cache::Union{
+        MIRKCache{iip, T}, FIRKCacheExpand{iip, T}, FIRKCacheNested{iip, T}}) where {iip, T}
+    (; order, defect, mesh, mesh_dt) = cache
     (abstol, _, _), kwargs = __split_mirk_kwargs(; cache.kwargs...)
-    N = length(cache.mesh)
+    N = length(mesh)
 
     safety_factor = T(1.3)
     ρ = T(1.0) # Set rho=1 means mesh distribution will take place everytime.
@@ -84,7 +203,8 @@ end
 Generate a new mesh based on the `ŝ`.
 """
 function redistribute!(
-        cache::MIRKCache{iip, T}, Nsub_star, ŝ, mesh, mesh_dt) where {iip, T}
+        cache::Union{MIRKCache{iip, T}, FIRKCacheExpand{iip, T}, FIRKCacheNested{iip, T}},
+        Nsub_star, ŝ, mesh, mesh_dt) where {iip, T}
     N = length(mesh)
     ζ = sum(ŝ .* mesh_dt) / Nsub_star
     k, i = 1, 0
@@ -134,7 +254,9 @@ function half_mesh!(mesh::Vector{T}, mesh_dt::Vector{T}) where {T}
     end
     return mesh, mesh_dt
 end
-half_mesh!(cache::MIRKCache) = half_mesh!(cache.mesh, cache.mesh_dt)
+function half_mesh!(cache::Union{MIRKCache, FIRKCacheNested, FIRKCacheExpand})
+    half_mesh!(cache.mesh, cache.mesh_dt)
+end
 
 """
     defect_estimate!(cache::MIRKCache)
@@ -144,8 +266,8 @@ the RK method in 'k_discrete', plus some new stages in 'k_interp' to construct
 an interpolant
 """
 @views function defect_estimate!(cache::MIRKCache{iip, T}) where {iip, T}
-    (; M, stage, f, alg, mesh, mesh_dt, defect) = cache
-    (; s_star, τ_star) = cache.ITU
+    (; f, alg, mesh, mesh_dt, defect) = cache
+    (; τ_star) = cache.ITU
 
     # Evaluate at the first sample point
     w₁, w₁′ = interp_weights(τ_star, alg)
@@ -183,6 +305,129 @@ an interpolant
     return maximum(Base.Fix1(maximum, abs), defect.u)
 end
 
+@views function defect_estimate!(cache::FIRKCacheExpand{iip, T}) where {iip, T}
+    (; f, M, stage, mesh, mesh_dt, defect, ITU) = cache
+    (; q_coeff, τ_star) = ITU
+
+    ctr = 1
+    K = zeros(eltype(cache.y[1].du), M, stage)
+    for i in 1:(length(mesh) - 1)
+        h = mesh_dt[i]
+
+        # Load interpolation residual
+        for j in 1:stage
+            K[:, j] = cache.y[ctr + j].du
+        end
+
+        # Defect estimate from q(x) at y_i + τ* * h
+        yᵢ₁ = copy(cache.y[ctr].du)
+        yᵢ₂ = copy(yᵢ₁)
+        z₁, z₁′ = eval_q(yᵢ₁, τ_star, h, q_coeff, K)
+        if iip
+            f(yᵢ₁, z₁, cache.p, mesh[i] + τ_star * h)
+        else
+            yᵢ₁ = f(z₁, cache.p, mesh[i] + τ_star * h)
+        end
+        yᵢ₁ .= (z₁′ .- yᵢ₁) ./ (abs.(yᵢ₁) .+ T(1))
+        est₁ = maximum(abs, yᵢ₁)
+
+        z₂, z₂′ = eval_q(yᵢ₂, (T(1) - τ_star), h, q_coeff, K)
+        # Defect estimate from q(x) at y_i + (1-τ*) * h
+        if iip
+            f(yᵢ₂, z₂, cache.p, mesh[i] + (T(1) - τ_star) * h)
+        else
+            yᵢ₂ = f(z₂, cache.p, mesh[i] + (T(1) - τ_star) * h)
+        end
+        yᵢ₂ .= (z₂′ .- yᵢ₂) ./ (abs.(yᵢ₂) .+ T(1))
+        est₂ = maximum(abs, yᵢ₂)
+
+        defect.u[i] .= est₁ > est₂ ? yᵢ₁ : yᵢ₂
+        ctr += stage + 1 # Advance one step
+    end
+
+    return maximum(Base.Fix1(maximum, abs), defect)
+end
+
+@views function defect_estimate!(cache::FIRKCacheNested{iip, T}) where {iip, T}
+    (; f, mesh, mesh_dt, defect, ITU, nest_prob, nest_tol) = cache
+    (; q_coeff, τ_star) = ITU
+
+    nlsolve_alg = __concrete_nonlinearsolve_algorithm(nest_prob, cache.alg.nlsolve)
+    nestprob_p = zeros(T, cache.M + 2)
+
+    for i in 1:(length(mesh) - 1)
+        h = mesh_dt[i]
+        yᵢ₁ = copy(cache.y[i].du)
+        yᵢ₂ = copy(yᵢ₁)
+
+        K = copy(cache.k_discrete[i].du)
+
+        if minimum(abs.(K)) < 1e-2
+            K = fill(one(eltype(K)), size(K))
+        end
+
+        nestprob_p[1] = mesh[i]
+        nestprob_p[2] = mesh_dt[i]
+        nestprob_p[3:end] .= yᵢ₁
+
+        _nestprob = remake(nest_prob, p = nestprob_p)
+        nest_sol = __solve(_nestprob, nlsolve_alg; abstol = nest_tol)
+
+        # Defect estimate from q(x) at y_i + τ* * h
+        z₁, z₁′ = eval_q(yᵢ₁, τ_star, h, q_coeff, nest_sol.u)
+        if iip
+            f(yᵢ₁, z₁, cache.p, mesh[i] + τ_star * h)
+        else
+            yᵢ₁ = f(z₁, cache.p, mesh[i] + τ_star * h)
+        end
+        yᵢ₁ .= (z₁′ .- yᵢ₁) ./ (abs.(yᵢ₁) .+ T(1))
+        est₁ = maximum(abs, yᵢ₁)
+
+        # Defect estimate from q(x) at y_i + (1-τ*) * h
+        z₂, z₂′ = eval_q(yᵢ₂, (T(1) - τ_star), h, q_coeff, nest_sol.u)
+        if iip
+            f(yᵢ₂, z₂, cache.p, mesh[i] + (T(1) - τ_star) * h)
+        else
+            yᵢ₂ = f(z₂, cache.p, mesh[i] + (T(1) - τ_star) * h)
+        end
+        yᵢ₂ .= (z₂′ .- yᵢ₂) ./ (abs.(yᵢ₂) .+ T(1))
+        est₂ = maximum(abs, yᵢ₂)
+
+        defect.u[i] .= est₁ > est₂ ? yᵢ₁ : yᵢ₂
+    end
+
+    return maximum(Base.Fix1(maximum, abs), defect)
+end
+
+function get_q_coeffs(A, ki, h)
+    coeffs = A * ki
+    for i in axes(coeffs, 1)
+        coeffs[i] = coeffs[i] / (h^(i - 1))
+    end
+    return coeffs
+end
+
+function apply_q(y_i, τ, h, coeffs)
+    return y_i + sum(coeffs[i] * (τ * h)^(i) for i in axes(coeffs, 1))
+end
+
+function apply_q_prime(τ, h, coeffs)
+    return sum(i * coeffs[i] * (τ * h)^(i - 1) for i in axes(coeffs, 1))
+end
+
+function eval_q(y_i, τ, h, A, K)
+    M = size(K, 1)
+    q = zeros(M)
+    q′ = zeros(M)
+    for i in 1:M
+        ki = @view K[i, :]
+        coeffs = get_q_coeffs(A, ki, h)
+        q[i] = apply_q(y_i[i], τ, h, coeffs)
+        q′[i] = apply_q_prime(τ, h, coeffs)
+    end
+    return q, q′
+end
+
 """
     interp_setup!(cache::MIRKCache)
 
@@ -207,12 +452,13 @@ Here, the ki_interp is the stages in one subinterval.
         end
         for i in eachindex(new_stages)
             new_stages.u[i] .= new_stages.u[i] .* mesh_dt[i] .+
-                             (1 - v_star[r]) .* vec(y[i].du) .+
-                             v_star[r] .* vec(y[i + 1].du)
+                               (1 - v_star[r]) .* vec(y[i].du) .+
+                               v_star[r] .* vec(y[i + 1].du)
             if iip
                 f(k_interp.u[i][:, r], new_stages.u[i], p, mesh[i] + c_star[r] * mesh_dt[i])
             else
-                k_interp.u[i][:, r] .= f(new_stages.u[i], p, mesh[i] + c_star[r] * mesh_dt[i])
+                k_interp.u[i][:, r] .= f(
+                    new_stages.u[i], p, mesh[i] + c_star[r] * mesh_dt[i])
             end
         end
     end
@@ -230,7 +476,7 @@ function sum_stages!(cache::MIRKCache, w, w′, i::Int, dt = cache.mesh_dt[i])
 end
 
 function sum_stages!(z::AbstractArray, cache::MIRKCache, w, i::Int, dt = cache.mesh_dt[i])
-    (; M, stage, mesh, k_discrete, k_interp, mesh_dt) = cache
+    (; stage, k_discrete, k_interp) = cache
     (; s_star) = cache.ITU
 
     z .= zero(z)
@@ -243,7 +489,7 @@ function sum_stages!(z::AbstractArray, cache::MIRKCache, w, i::Int, dt = cache.m
 end
 
 @views function sum_stages!(z, z′, cache::MIRKCache, w, w′, i::Int, dt = cache.mesh_dt[i])
-    (; M, stage, mesh, k_discrete, k_interp, mesh_dt) = cache
+    (; stage, k_discrete, k_interp) = cache
     (; s_star) = cache.ITU
 
     z .= zero(z)
@@ -376,7 +622,7 @@ for order in (2, 3, 4, 5, 6)
 end
 
 function sol_eval(cache::MIRKCache{T}, t::T) where {T}
-    (; M, mesh, mesh_dt, alg, k_discrete, k_interp, y) = cache
+    (; M, mesh, mesh_dt, alg) = cache
 
     @assert mesh[1] ≤ t ≤ mesh[end]
     i = interval(mesh, t)
diff --git a/src/alg_utils.jl b/src/alg_utils.jl
index 4791ef97..c6b4dc79 100644
--- a/src/alg_utils.jl
+++ b/src/alg_utils.jl
@@ -4,8 +4,33 @@ for order in (2, 3, 4, 5, 6)
     @eval alg_stage(::$(alg)) = $(order - 1)
 end
 
+for stage in (1, 2, 3, 5, 7)
+    alg = Symbol("RadauIIa$(stage)")
+    @eval alg_order(::$(alg)) = $(2 * stage - 1)
+    @eval alg_stage(::$(alg)) = $stage
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIa$(stage)")
+    @eval alg_order(::$(alg)) = $(2 * stage - 2)
+    @eval alg_stage(::$(alg)) = $stage
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIb$(stage)")
+    @eval alg_order(::$(alg)) = $(2 * stage - 2)
+    @eval alg_stage(::$(alg)) = $stage
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIc$(stage)")
+    @eval alg_order(::$(alg)) = $(2 * stage - 2)
+    @eval alg_stage(::$(alg)) = $stage
+end
+
 SciMLBase.isautodifferentiable(::BoundaryValueDiffEqAlgorithm) = true
 SciMLBase.allows_arbitrary_number_types(::BoundaryValueDiffEqAlgorithm) = true
 SciMLBase.allowscomplex(alg::BoundaryValueDiffEqAlgorithm) = true
 
 SciMLBase.isadaptive(alg::AbstractMIRK) = true
+SciMLBase.isadaptive(alg::AbstractFIRK) = true
diff --git a/src/algorithms.jl b/src/algorithms.jl
index 338208ea..d1a6cc13 100644
--- a/src/algorithms.jl
+++ b/src/algorithms.jl
@@ -2,6 +2,7 @@
 abstract type BoundaryValueDiffEqAlgorithm <: SciMLBase.AbstractBVPAlgorithm end
 abstract type AbstractShooting <: BoundaryValueDiffEqAlgorithm end
 abstract type AbstractMIRK <: BoundaryValueDiffEqAlgorithm end
+abstract type AbstractFIRK <: BoundaryValueDiffEqAlgorithm end
 
 ## Disable the ugly verbose printing by default
 @inline __modifier_text!(list, fieldname, field) = push!(list, "$fieldname = $(field)")
@@ -195,6 +196,376 @@ for order in (2, 3, 4, 5, 6)
     end
 end
 
+for stage in (1, 2, 3, 5, 7)
+    alg = Symbol("RadauIIa$(stage)")
+
+    @eval begin
+        """
+            $($alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm(), nested_nlsolve = false, nest_tol = 0.0,
+                    defect_threshold = 0.1, max_num_subintervals = 3000)
+
+        $($stage)th stage RadauIIa method.
+
+        ## Keyword Arguments
+
+        - `nlsolve`: Internal Nonlinear solver. Any solver which conforms to the SciML
+          `NonlinearProblem` interface can be used. Note that any autodiff argument for
+          the solver will be ignored and a custom jacobian algorithm will be used.
+        - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
+          `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
+          use based on the input types and problem type.
+          - For `TwoPointBVProblem`, only `diffmode` is used (defaults to
+            `AutoSparse(AutoForwardDiff)` if possible else `AutoSparse(AutoFiniteDiff)`).
+          - For `BVProblem`, `bc_diffmode` and `nonbc_diffmode` are used. For
+            `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
+            `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
+            possible else `AutoFiniteDiff`.
+        - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the 
+        implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are 
+        solved as a part of the global residual. The general recommendation is to choose 
+        `true` for larger problems and `false` for smaller ones.
+        - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to 
+        `NonlinearSolve` automatically selecting the tolerance.
+        - `defect_threshold`: Threshold for defect control.
+        - `max_num_subintervals`: Number of maximal subintervals, default as 3000.
+
+      !!! note
+          For type-stability, the chunksizes for ForwardDiff ADTypes in
+          `BVPJacobianAlgorithm` must be provided.
+
+      ## References
+            Reference for Lobatto and Radau methods:
+
+                @incollection{Jay2015,
+                author="Jay, Laurent O.",
+                editor="Engquist, Bj{\"o}rn",
+                title="Lobatto Methods",
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                year="2015",
+                publisher="Springer Berlin Heidelberg",
+                }
+                @incollection{engquist_radau_2015,
+                author = {Hairer, Ernst and Wanner, Gerhard},
+                editor={Engquist, Bj{\"o}rn},
+                title = {Radau {Methods}},
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                publisher = {Springer Berlin Heidelberg},
+                year = {2015},
+            }
+      References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
+        @article{shampine_solving_nodate,
+        title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
+        author = {Shampine, Lawrence F and Kierzenka, Jacek and Reichelt, Mark W},
+        year = {2000},
+    }
+
+    @article{kierzenka_bvp_2008,
+        title = {A {BVP} {Solver} that {Controls} {Residual} and {Error}},
+        author = {Kierzenka, J and Shampine, L F},
+        year = {2008},
+    }
+
+    @article{russell_adaptive_1978,
+        title = {Adaptive {Mesh} {Selection} {Strategies} for {Solving} {Boundary} {Value} {Problems}},
+        journal = {SIAM Journal on Numerical Analysis},
+        author = {Russell, R. D. and Christiansen, J.},
+        year = {1978},
+    }
+        """
+        Base.@kwdef struct $(alg){N, J <: BVPJacobianAlgorithm, T} <: AbstractFIRK
+            nlsolve::N = nothing
+            jac_alg::J = BVPJacobianAlgorithm()
+            nested_nlsolve::Bool = false
+            nest_tol::Number = 0.0
+            defect_threshold::T = 0.1
+            max_num_subintervals::Int = 3000
+        end
+        $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){
+            N, J, T}(
+            nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals)
+    end
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIa$(stage)")
+
+    @eval begin
+        """
+            $($alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm(), nested_nlsolve = false, nest_tol = 0.0,
+                    defect_threshold = 0.1, max_num_subintervals = 3000)
+
+        $($stage)th stage LobattoIIIa method.
+
+        ## Keyword Arguments
+
+        - `nlsolve`: Internal Nonlinear solver. Any solver which conforms to the SciML
+          `NonlinearProblem` interface can be used. Note that any autodiff argument for
+          the solver will be ignored and a custom jacobian algorithm will be used.
+        - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
+          `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
+          use based on the input types and problem type.
+          - For `TwoPointBVProblem`, only `diffmode` is used (defaults to
+            `AutoSparse(AutoForwardDiff)` if possible else `AutoSparse(AutoFiniteDiff)`).
+          - For `BVProblem`, `bc_diffmode` and `nonbc_diffmode` are used. For
+            `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
+            `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
+            possible else `AutoFiniteDiff`.
+        - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the 
+        implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are 
+        solved as a part of the global residual. The general recommendation is to choose 
+        `true` for larger problems and `false` for smaller ones.
+        - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to 
+        `NonlinearSolve` automatically selecting the tolerance.
+        - `defect_threshold`: Threshold for defect control.
+        - `max_num_subintervals`: Number of maximal subintervals, default as 3000.
+
+      !!! note
+          For type-stability, the chunksizes for ForwardDiff ADTypes in
+          `BVPJacobianAlgorithm` must be provided.
+
+      ## References
+            Reference for Lobatto and Radau methods:
+
+                @Inbook{Jay2015,
+                author="Jay, Laurent O.",
+                editor="Engquist, Bj{\"o}rn",
+                title="Lobatto Methods",
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                year="2015",
+                publisher="Springer Berlin Heidelberg",
+                }
+                @incollection{engquist_radau_2015,
+                author = {Hairer, Ernst and Wanner, Gerhard},
+                title = {Radau {Methods}},
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                publisher = {Springer Berlin Heidelberg},
+                editor="Engquist, Bj{\"o}rn",
+                year = {2015},
+            }
+      References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
+        @article{shampine_solving_nodate,
+        title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
+        author = {Shampine, Lawrence F and Kierzenka, Jacek and Reichelt, Mark W},
+        year = {2000},
+    }
+
+    @article{kierzenka_bvp_2008,
+        title = {A {BVP} {Solver} that {Controls} {Residual} and {Error}},
+        author = {Kierzenka, J and Shampine, L F},
+        year = {2008},
+    }
+
+    @article{russell_adaptive_1978,
+        title = {Adaptive {Mesh} {Selection} {Strategies} for {Solving} {Boundary} {Value} {Problems}},
+        journal = {SIAM Journal on Numerical Analysis},
+        author = {Russell, R. D. and Christiansen, J.},
+        year = {1978},
+        file = {Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:/Users/AXLRSN/Zotero/storage/HKU27A4T/Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:application/pdf},
+    }
+        """
+        Base.@kwdef struct $(alg){N, J <: BVPJacobianAlgorithm, T} <: AbstractFIRK
+            nlsolve::N = nothing
+            jac_alg::J = BVPJacobianAlgorithm()
+            nested_nlsolve::Bool = false
+            nest_tol::Number = 0.0
+            defect_threshold::T = 0.1
+            max_num_subintervals::Int = 3000
+        end
+        $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){
+            N, J, T}(
+            nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals)
+    end
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIb$(stage)")
+
+    @eval begin
+        """
+            $($alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm(), nested_nlsolve = false, nest_tol = 0.0,
+                    defect_threshold = 0.1, max_num_subintervals = 3000)
+
+        $($stage)th stage LobattoIIIb method.
+
+        ## Keyword Arguments
+
+        - `nlsolve`: Internal Nonlinear solver. Any solver which conforms to the SciML
+          `NonlinearProblem` interface can be used. Note that any autodiff argument for
+          the solver will be ignored and a custom jacobian algorithm will be used.
+        - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
+          `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
+          use based on the input types and problem type.
+          - For `TwoPointBVProblem`, only `diffmode` is used (defaults to
+            `AutoSparse(AutoForwardDiff)` if possible else `AutoSparse(AutoFiniteDiff)`).
+          - For `BVProblem`, `bc_diffmode` and `nonbc_diffmode` are used. For
+            `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
+            `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
+            possible else `AutoFiniteDiff`.
+        - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the 
+        implicit FIRK step. Defaults to `true`. If set to `false`, the FIRK stages are 
+        solved as a part of the global residual. The general recommendation is to choose 
+        `true` for larger problems and `false` for smaller ones.
+        - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to 
+        `NonlinearSolve` automatically selecting the tolerance.
+        - `defect_threshold`: Threshold for defect control.
+        - `max_num_subintervals`: Number of maximal subintervals, default as 3000.
+
+      !!! note
+          For type-stability, the chunksizes for ForwardDiff ADTypes in
+          `BVPJacobianAlgorithm` must be provided.
+
+      ## References
+            Reference for Lobatto and Radau methods:
+
+                @Inbook{Jay2015,
+                author="Jay, Laurent O.",
+                editor="Engquist, Bj{\"o}rn",
+                title="Lobatto Methods",
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                year="2015",
+                publisher="Springer Berlin Heidelberg",
+                }
+                @incollection{engquist_radau_2015,
+                author = {Hairer, Ernst and Wanner, Gerhard},
+                title = {Radau {Methods}},
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                publisher = {Springer Berlin Heidelberg},
+                editor="Engquist, Bj{\"o}rn",
+                year = {2015},
+            }
+      References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
+        @article{shampine_solving_nodate,
+        title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
+        author = {Shampine, Lawrence F and Kierzenka, Jacek and Reichelt, Mark W},
+        year = {2000},
+    }
+
+    @article{kierzenka_bvp_2008,
+        title = {A {BVP} {Solver} that {Controls} {Residual} and {Error}},
+        author = {Kierzenka, J and Shampine, L F},
+        year = {2008},
+    }
+
+    @article{russell_adaptive_1978,
+        title = {Adaptive {Mesh} {Selection} {Strategies} for {Solving} {Boundary} {Value} {Problems}},
+        journal = {SIAM Journal on Numerical Analysis},
+        author = {Russell, R. D. and Christiansen, J.},
+        year = {1978},
+        file = {Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:/Users/AXLRSN/Zotero/storage/HKU27A4T/Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:application/pdf},
+    }
+        """
+        Base.@kwdef struct $(alg){N, J <: BVPJacobianAlgorithm, T} <: AbstractFIRK
+            nlsolve::N = nothing
+            jac_alg::J = BVPJacobianAlgorithm()
+            nested_nlsolve::Bool = false
+            nest_tol::Number = 0.0
+            defect_threshold::T = 0.1
+            max_num_subintervals::Int = 3000
+        end
+        $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){
+            N, J, T}(
+            nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals)
+    end
+end
+
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIc$(stage)")
+
+    @eval begin
+        """
+            $($alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm(), nested_nlsolve = false, nest_tol = 0.0,
+                    defect_threshold = 0.1, max_num_subintervals = 3000)
+
+        $($stage)th stage LobattoIIIc method.
+
+        ## Keyword Arguments
+
+        - `nlsolve`: Internal Nonlinear solver. Any solver which conforms to the SciML
+          `NonlinearProblem` interface can be used. Note that any autodiff argument for
+          the solver will be ignored and a custom jacobian algorithm will be used.
+        - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
+          `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
+          use based on the input types and problem type.
+          - For `TwoPointBVProblem`, only `diffmode` is used (defaults to
+            `AutoSparse(AutoForwardDiff)` if possible else `AutoSparse(AutoFiniteDiff)`).
+          - For `BVProblem`, `bc_diffmode` and `nonbc_diffmode` are used. For
+            `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
+            `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
+            possible else `AutoFiniteDiff`.
+        - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the 
+        implicit FIRK step. Defaults to `true`. If set to `false`, the FIRK stages are 
+        solved as a part of the global residual. The general recommendation is to choose 
+        `true` for larger problems and `false` for smaller ones.
+        - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to 
+        `NonlinearSolve` automatically selecting the tolerance.
+        - `defect_threshold`: Threshold for defect control.
+        - `max_num_subintervals`: Number of maximal subintervals, default as 3000.
+
+      !!! note
+          For type-stability, the chunksizes for ForwardDiff ADTypes in
+          `BVPJacobianAlgorithm` must be provided.
+
+      ## References
+            Reference for Lobatto and Radau methods:
+
+                @Inbook{Jay2015,
+                author="Jay, Laurent O.",
+                editor="Engquist, Bj{\"o}rn",
+                title="Lobatto Methods",
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                year="2015",
+                publisher="Springer Berlin Heidelberg",
+                }
+                @incollection{engquist_radau_2015,
+                author = {Hairer, Ernst and Wanner, Gerhard},
+                title = {Radau {Methods}},
+                booktitle = {Encyclopedia of {Applied} and {Computational} {Mathematics}},
+                publisher = {Springer Berlin Heidelberg},
+                editor="Engquist, Bj{\"o}rn",
+                year = {2015},
+            }
+      References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
+        @article{shampine_solving_nodate,
+        title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
+        author = {Shampine, Lawrence F and Kierzenka, Jacek and Reichelt, Mark W},
+        year = {2000},
+    }
+
+    @article{kierzenka_bvp_2008,
+        title = {A {BVP} {Solver} that {Controls} {Residual} and {Error}},
+        author = {Kierzenka, J and Shampine, L F},
+        year = {2008},
+    }
+
+    @article{russell_adaptive_1978,
+        title = {Adaptive {Mesh} {Selection} {Strategies} for {Solving} {Boundary} {Value} {Problems}},
+        journal = {SIAM Journal on Numerical Analysis},
+        author = {Russell, R. D. and Christiansen, J.},
+        year = {1978},
+        file = {Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:/Users/AXLRSN/Zotero/storage/HKU27A4T/Russell and Christiansen - 1978 - Adaptive Mesh Selection Strategies for Solving Bou.pdf:application/pdf},
+    }
+        """
+        Base.@kwdef struct $(alg){N, J <: BVPJacobianAlgorithm, T} <: AbstractFIRK
+            nlsolve::N = nothing
+            jac_alg::J = BVPJacobianAlgorithm()
+            nested_nlsolve::Bool = false
+            nest_tol::Number = 0.0
+            defect_threshold::T = 0.1
+            max_num_subintervals::Int = 3000
+        end
+        $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){
+            N, J, T}(
+            nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals)
+    end
+end
+
+# FIRK Algorithms that don't use adaptivity
+const FIRKNoAdaptivity = Union{LobattoIIIb2, RadauIIa1, LobattoIIIc2}
+
 """
     BVPM2(; max_num_subintervals = 3000, method_choice = 4, diagnostic_output = 1,
         error_control = 1, singular_term = nothing)
diff --git a/src/collocation.jl b/src/collocation.jl
index 9c14fc3f..589db587 100644
--- a/src/collocation.jl
+++ b/src/collocation.jl
@@ -1,8 +1,13 @@
-function Φ!(residual, cache::MIRKCache, y, u, p = cache.p)
+function Φ!(residual, cache::Union{MIRKCache, FIRKCacheExpand}, y, u, p = cache.p)
     return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f,
         cache.TU, y, u, p, cache.mesh, cache.mesh_dt, cache.stage)
 end
 
+function Φ!(residual, cache::FIRKCacheNested, y, u, p = cache.p)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
+        y, u, p, cache.mesh, cache.mesh_dt, cache.stage, cache)
+end
+
 @views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau,
         y, u, p, mesh, mesh_dt, stage::Int)
     (; c, v, x, b) = TU
@@ -29,11 +34,118 @@ end
     end
 end
 
-function Φ(cache::MIRKCache, y, u, p = cache.p)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false},
+        y, u, p, mesh, mesh_dt, stage::Int)
+    (; c, a, b) = TU
+    tmp1 = get_tmp(fᵢ_cache, u)
+    K = get_tmp(k_discrete[1], u) # Not optimal # TODO
+    T = eltype(u)
+    ctr = 1
+
+    for i in eachindex(mesh_dt)
+        h = mesh_dt[i]
+        yᵢ = get_tmp(y[ctr], u)
+        yᵢ₊₁ = get_tmp(y[ctr + stage + 1], u)
+
+        # Load interpolation residual
+        for j in 1:stage
+            K[:, j] = get_tmp(y[ctr + j], u)
+        end
+
+        # Update interpolation residual
+        for r in 1:stage
+            @. tmp1 = yᵢ
+            __maybe_matmul!(tmp1, K, a[:, r], h, T(1))
+            f!(residual[ctr + r], tmp1, p, mesh[i] + c[r] * h)
+            residual[ctr + r] .-= K[:, r]
+        end
+
+        # Update mesh point residual
+        residᵢ = residual[ctr]
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, K, b, -h, T(1))
+        ctr += stage + 1
+    end
+end
+
+function FIRK_nlsolve!(res, K, p_nlsolve, f!, TU::FIRKTableau{true}, p_f!)
+    (; a, c, s) = TU
+    mesh_i = p_nlsolve[1]
+    h = p_nlsolve[2]
+    yᵢ = @view p_nlsolve[3:end]
+
+    T = promote_type(eltype(K), eltype(yᵢ))
+    tmp1 = similar(K, T, size(K, 1))
+
+    for r in 1:s
+        @. tmp1 = yᵢ
+        __maybe_matmul!(tmp1, K, a[:, r], h, T(1))
+
+        f!(@view(res[:, r]), tmp1, p_f!, mesh_i + c[r] * h)
+        @views res[:, r] .-= K[:, r]
+    end
+    return nothing
+end
+
+function FIRK_nlsolve(K, p_nlsolve, f!, TU::FIRKTableau{true}, p_f!)
+    (; a, c, s) = TU
+    mesh_i = p_nlsolve[1]
+    h = p_nlsolve[2]
+    yᵢ = @view p_nlsolve[3:end]
+
+    T = promote_type(eltype(K), eltype(yᵢ))
+    tmp1 = similar(K, T, size(K, 1))
+    res = similar(K, T, size(K))
+
+    for r in 1:s
+        @. tmp1 = yᵢ
+        __maybe_matmul!(tmp1, K, a[:, r], h, T(1))
+        @views res[:, r] = f!(tmp1, p_f!, mesh_i + c[r] * h)
+        @views res[:, r] .-= K[:, r]
+    end
+    return res
+end
+
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true},
+        y, u, p, mesh, mesh_dt, stage::Int, cache)
+    (; b) = TU
+    (; nest_prob, nest_tol) = cache
+
+    T = eltype(u)
+    nestprob_p = vcat(T(mesh[1]), T(mesh_dt[1]), get_tmp(y[1], u))
+    nest_nlsolve_alg = __concrete_nonlinearsolve_algorithm(nest_prob, cache.alg.nlsolve)
+
+    for i in eachindex(k_discrete)
+        residᵢ = residual[i]
+        h = mesh_dt[i]
+
+        yᵢ = get_tmp(y[i], u)
+        yᵢ₊₁ = get_tmp(y[i + 1], u)
+
+        nestprob_p[1] = T(mesh[i])
+        nestprob_p[2] = T(mesh_dt[i])
+        nestprob_p[3:end] = yᵢ
+
+        K = get_tmp(k_discrete[i], u)
+
+        _nestprob = remake(nest_prob, p = nestprob_p)
+        nestsol = solve(_nestprob, nest_nlsolve_alg; abstol = nest_tol)
+        @. K = nestsol.u
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, nestsol.u, b, -h, T(1))
+    end
+end
+
+function Φ(cache::Union{MIRKCache, FIRKCacheExpand}, y, u, p = cache.p)
     return Φ(cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
         y, u, p, cache.mesh, cache.mesh_dt, cache.stage)
 end
 
+function Φ(cache::FIRKCacheNested, y, u, p = cache.p)
+    return Φ(cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y,
+        u, p, cache.mesh, cache.mesh_dt, cache.stage, cache)
+end
+
 @views function Φ(
         fᵢ_cache, k_discrete, f, TU::MIRKTableau, y, u, p, mesh, mesh_dt, stage::Int)
     (; c, v, x, b) = TU
@@ -61,3 +173,70 @@ end
 
     return residuals
 end
+
+@views function Φ(
+        fᵢ_cache, k_discrete, f, TU::FIRKTableau{false}, y, u, p, mesh, mesh_dt, stage::Int)
+    (; c, a, b) = TU
+    residuals = [__similar(yᵢ) for yᵢ in y[1:(end - 1)]]
+    tmp1 = get_tmp(fᵢ_cache, u)
+    K = get_tmp(k_discrete[1], u) # Not optimal # TODO
+    T = eltype(u)
+    ctr = 1
+
+    for i in eachindex(mesh_dt)
+        h = mesh_dt[i]
+        yᵢ = get_tmp(y[ctr], u)
+        yᵢ₊₁ = get_tmp(y[ctr + stage + 1], u)
+
+        # Load interpolation residual
+        for j in 1:stage
+            K[:, j] = get_tmp(y[ctr + j], u)
+        end
+
+        # Update interpolation residual
+        for r in 1:stage
+            @. tmp1 = yᵢ
+            __maybe_matmul!(tmp1, K, a[:, r], h, T(1))
+            residuals[ctr + r] = f(tmp1, p, mesh[i] + c[r] * h)
+            residuals[ctr + r] .-= K[:, r]
+        end
+
+        # Update mesh point residual
+        residᵢ = residuals[ctr]
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, K, b, -h, T(1))
+        ctr += stage + 1
+    end
+    return residuals
+end
+
+@views function Φ(fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true},
+        y, u, p, mesh, mesh_dt, stage::Int, cache)
+    (; b) = TU
+    (; nest_prob, alg, nest_tol) = cache
+
+    residuals = [__similar(yᵢ) for yᵢ in y[1:(end - 1)]]
+
+    T = eltype(u)
+    nestprob_p = vcat(T(mesh[1]), T(mesh_dt[1]), get_tmp(y[1], u))
+    nest_nlsolve_alg = __concrete_nonlinearsolve_algorithm(nest_prob, alg.nlsolve)
+
+    for i in eachindex(k_discrete)
+        residᵢ = residuals[i]
+        h = mesh_dt[i]
+
+        yᵢ = get_tmp(y[i], u)
+        yᵢ₊₁ = get_tmp(y[i + 1], u)
+
+        nestprob_p[1] = T(mesh[i])
+        nestprob_p[2] = T(mesh_dt[i])
+        nestprob_p[3:end] = yᵢ
+
+        _nestprob = remake(nest_prob, p = nestprob_p)
+        nestsol = solve(_nestprob, nest_nlsolve_alg, abstol = nest_tol)
+
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, nestsol.u, b, -h, T(1))
+    end
+    return residuals
+end
diff --git a/src/interpolation.jl b/src/interpolation.jl
index 9b1c59d6..6fe00495 100644
--- a/src/interpolation.jl
+++ b/src/interpolation.jl
@@ -1,3 +1,4 @@
+# MIRK Interpolation
 @concrete struct MIRKInterpolation <: AbstractDiffEqInterpolation
     t
     u
@@ -17,10 +18,48 @@ function (id::MIRKInterpolation)(val, tvals, idxs, deriv, p, continuity::Symbol
     return
 end
 
-# FIXME: Fix the interpolation outside the tspan
+# FIRK Expand Interpolation
+struct FIRKExpandInterpolation{T1, T2} <: AbstractDiffEqInterpolation
+    t::T1
+    u::T2
+    cache
+end
+
+function DiffEqBase.interp_summary(interp::FIRKExpandInterpolation)
+    return "FIRK Order $(interp.cache.order) Interpolation"
+end
+
+function (id::FIRKExpandInterpolation)(tvals, idxs, deriv, p, continuity::Symbol = :left)
+    interpolation(tvals, id, idxs, deriv, p, continuity)
+end
+
+function (id::FIRKExpandInterpolation)(
+        val, tvals, idxs, deriv, p, continuity::Symbol = :left)
+    interpolation!(val, tvals, id, idxs, deriv, p, continuity)
+end
+
+# FIRK Nested Interpolation
+struct FIRKNestedInterpolation{T1, T2} <: AbstractDiffEqInterpolation
+    t::T1
+    u::T2
+    cache
+end
 
-@inline function interpolation(
-        tvals, id::I, idxs, deriv::D, p, continuity::Symbol = :left) where {I, D}
+function DiffEqBase.interp_summary(interp::FIRKNestedInterpolation)
+    return "FIRK Order $(interp.cache.order) Interpolation"
+end
+
+function (id::FIRKNestedInterpolation)(tvals, idxs, deriv, p, continuity::Symbol = :left)
+    interpolation(tvals, id, idxs, deriv, p, continuity)
+end
+
+function (id::FIRKNestedInterpolation)(
+        val, tvals, idxs, deriv, p, continuity::Symbol = :left)
+    interpolation!(val, tvals, id, idxs, deriv, p, continuity)
+end
+
+@inline function interpolation(tvals, id::MIRKInterpolation, idxs, deriv::D,
+        p, continuity::Symbol = :left) where {D}
     (; t, u, cache) = id
     tdir = sign(t[end] - t[1])
     idx = sortperm(tvals, rev = tdir < 0)
@@ -41,8 +80,8 @@ end
     return DiffEqArray(vals, tvals)
 end
 
-@inline function interpolation!(
-        vals, tvals, id::I, idxs, deriv::D, p, continuity::Symbol = :left) where {I, D}
+@inline function interpolation!(vals, tvals, id::MIRKInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
     (; t, cache) = id
     tdir = sign(t[end] - t[1])
     idx = sortperm(tvals, rev = tdir < 0)
@@ -54,8 +93,8 @@ end
     end
 end
 
-@inline function interpolation(
-        tval::Number, id::I, idxs, deriv::D, p, continuity::Symbol = :left) where {I, D}
+@inline function interpolation(tval::Number, id::MIRKInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
     z = similar(id.cache.fᵢ₂_cache)
     interpolant!(z, id.cache, tval, id.cache.mesh, id.cache.mesh_dt, deriv)
     return idxs !== nothing ? z[idxs] : z
@@ -79,3 +118,136 @@ end
     z = similar(dz)
     sum_stages!(z, dz, cache, w, w′, i)
 end
+
+@inline function interpolation(tvals, id::FIRKNestedInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    (; t, u, cache) = id
+    tdir = sign(t[end] - t[1])
+    idx = sortperm(tvals, rev = tdir < 0)
+
+    if idxs isa Number
+        vals = Vector{eltype(first(u))}(undef, length(tvals))
+    elseif idxs isa AbstractVector
+        vals = Vector{Vector{eltype(first(u))}}(undef, length(tvals))
+    else
+        vals = Vector{eltype(u)}(undef, length(tvals))
+    end
+
+    for j in idx
+        z = similar(cache.fᵢ₂_cache)
+        interp_eval!(z, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt)
+        vals[j] = idxs !== nothing ? z[idxs] : z
+    end
+    return DiffEqArray(vals, tvals)
+end
+
+@inline function interpolation!(vals, tvals, id::FIRKNestedInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    (; t, cache) = id
+    tdir = sign(t[end] - t[1])
+    idx = sortperm(tvals, rev = tdir < 0)
+
+    for j in idx
+        z = similar(cache.fᵢ₂_cache)
+        interp_eval!(z, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt)
+        vals[j] = z
+    end
+end
+
+@inline function interpolation(tval::Number, id::FIRKNestedInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    z = similar(id.cache.fᵢ₂_cache)
+    interp_eval!(z, id.cache, tval, id.cache.mesh, id.cache.mesh_dt)
+    return idxs !== nothing ? z[idxs] : z
+end
+
+## Expanded
+@inline function interpolation(tvals, id::FIRKExpandInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    (; t, u, cache) = id
+    tdir = sign(t[end] - t[1])
+    idx = sortperm(tvals, rev = tdir < 0)
+
+    if idxs isa Number
+        vals = Vector{eltype(first(u))}(undef, length(tvals))
+    elseif idxs isa AbstractVector
+        vals = Vector{Vector{eltype(first(u))}}(undef, length(tvals))
+    else
+        vals = Vector{eltype(u)}(undef, length(tvals))
+    end
+
+    for j in idx
+        z = similar(cache.fᵢ₂_cache)
+        interp_eval!(z, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt)
+        vals[j] = idxs !== nothing ? z[idxs] : z
+    end
+    return DiffEqArray(vals, tvals)
+end
+
+@inline function interpolation!(vals, tvals, id::FIRKExpandInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    (; t, cache) = id
+    tdir = sign(t[end] - t[1])
+    idx = sortperm(tvals, rev = tdir < 0)
+
+    for j in idx
+        z = similar(cache.fᵢ₂_cache)
+        interp_eval!(z, j, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt)
+        vals[j] = z
+    end
+end
+
+@inline function interpolation(tval::Number, id::FIRKExpandInterpolation, idxs,
+        deriv::D, p, continuity::Symbol = :left) where {D}
+    z = similar(id.cache.fᵢ₂_cache)
+    interp_eval!(z, 1, id.cache, tval, id.cache.mesh, id.cache.mesh_dt)
+    return idxs !== nothing ? z[idxs] : z
+end
+
+@inline __build_interpolation(cache::MIRKCache, u::AbstractVector) = MIRKInterpolation(
+    cache.mesh, u, cache)
+@inline __build_interpolation(cache::FIRKCacheExpand, u::AbstractVector) = FIRKExpandInterpolation(
+    cache.mesh, u, cache)
+@inline __build_interpolation(cache::FIRKCacheNested, u::AbstractVector) = FIRKNestedInterpolation(
+    cache.mesh, u, cache)
+
+"""
+    get_ymid(yᵢ, coeffs, K, h)
+
+Gets the interpolated middle value for a RK method, see bvp5c paper.
+"""
+function get_ymid(yᵢ, coeffs, K, h)
+    res = copy(yᵢ)
+    for i in axes(K, 2)
+        res .+= h .* K[:, i] .* coeffs[i]
+    end
+    return res
+end
+
+"""
+    s_constraints(M, h)
+
+Form the quartic interpolation constraint matrix, see bvp5c paper.
+"""
+function s_constraints(M, h)
+    t = vec(repeat([0.0, 1.0 * h, 0.5 * h, 0.0, 1.0 * h, 0.5 * h], 1, M))
+    A = zeros(6 * M, 6 * M)
+    for i in 1:6
+        row_start = (i - 1) * M + 1
+        if i <= 3
+            for k in 0:(M - 1)
+                for j in 1:6
+                    A[row_start + k, j + k * 6] = t[i + k * 6]^(j - 1)
+                end
+            end
+        else
+            for k in 0:(M - 1)
+                for j in 1:6
+                    A[row_start + k, j + k * 6] = j == 1.0 ? 0.0 :
+                                                  (j - 1) * t[i + k * 6]^(j - 2)
+                end
+            end
+        end
+    end
+    return A
+end
diff --git a/src/lobatto_tableaus.jl b/src/lobatto_tableaus.jl
new file mode 100644
index 00000000..f36fbefb
--- /dev/null
+++ b/src/lobatto_tableaus.jl
@@ -0,0 +1,281 @@
+# LobattoIIIa
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIa$(stage)")
+    f = Symbol("constructLobattoIIIa$(stage)")
+    @eval constructRK(_alg::$(alg), ::Type{T}) where {T} = $(f)(T, _alg.nested_nlsolve)
+end
+
+function constructLobattoIIIa2(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 2
+    a = [0 0
+         1//2 1//2]
+    a = permutedims(a, (2, 1))
+    c = [0, 1]
+    b = [1 // 2, 1 // 2]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0; -0.5 0.5]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIa3(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 3
+    a = [0 0 0
+         5//24 1//3 -1//24
+         1//6 2//3 1//6]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2, 1]
+    b = [1 // 6, 2 // 3, 1 // 6]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0;
+               -1.5 2.0 -0.5;
+               0.6666666666666666 -1.3333333333333333 0.6666666666666666]
+    τ_star = 0.21132486540518713
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIa4(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 4
+    a = [0 0 0 0
+         (11 + Rational(√5))//120 (25 - Rational(√5))//120 (25 - 13 * Rational(√5))//120 (-1 + Rational(√5))//120
+         (11 - Rational(√5))//120 (25 + 13 * Rational(√5))//120 (25 + Rational(√5))//120 (-1 - Rational(√5))//120
+         1//12 5//12 5//12 1//12]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(√5) // 10, 1 // 2 + Rational(√5) // 10, 1]
+    b = [1 // 12, 5 // 12, 5 // 12, 1 // 12]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0;
+               -3.0 4.04508497187474 -1.545084971874738 0.5;
+               3.3333333333333357 -6.423503277082812 4.756836610416144 -1.6666666666666674;
+               -1.25 2.7950849718747395 -2.795084971874738 1.25]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIa5(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 5
+    a = [0 0 0 0 0
+         (119 + 3 * Rational(√21))//1960 (343 - 9 * Rational(√21))//2520 (392 - 96 * Rational(√21))//2205 (343 - 69 * Rational(√21))//2520 (-21 + 3 * Rational(√21))//1960
+         13//320 (392 + 105 * Rational(√21))//2880 8//45 (392 - 105 * Rational(√21))//2880 3//320
+         (119 - 3 * Rational(√21))//1960 (343 + 69 * Rational(√21))//2520 (392 + 96 * Rational(√21))//2205 (343 + 9 * Rational(√21))//2520 (-21 - 3 * Rational(√21))//1960
+         1//20 49//180 16//45 49//180 1//20]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(√21) // 14, 1 // 2, 1 // 2 + Rational(√21) // 14, 1]
+    b = [1 // 20, 49 // 180, 16 // 45, 49 // 180, 1 // 20]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0 0.0;
+               -5.0 6.756502488724233 -2.6666666666666603 1.4101641779424228 -0.5;
+               10.0 -18.957449421892882 14.222222222222186 -8.264772800329274 3.0;
+               -8.75 19.006502488724166 -18.666666666666604 13.660164177942388 -5.25;
+               2.8 -6.533333333333296 7.466666666666636 -6.533333333333315 2.8]
+    τ_star = 0.33000947820757126
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+# LobattoIIIb
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIb$(stage)")
+    f = Symbol("constructLobattoIIIb$(stage)")
+    @eval constructRK(_alg::$(alg), ::Type{T}) where {T} = $(f)(T, _alg.nested_nlsolve)
+end
+
+function constructLobattoIIIb2(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 2
+    a = [1//2 0
+         1//2 0]
+    a = permutedims(a, (2, 1))
+    c = [0, 1]
+    b = [1 // 2, 1 // 2]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0; -0.5 0.5]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIb3(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 3
+    a = [1//6 -1//6 0
+         1//6 1//3 0
+         1//6 5//6 0]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2, 1]
+    b = [1 // 6, 2 // 3, 1 // 6]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0;
+               -1.5 2.0 -0.5;
+               0.6666666666666666 -1.3333333333333333 0.6666666666666666]
+    τ_star = 0.21132486540518713
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIb4(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 4
+    a = [1//12 (-1 - Rational(√5))//24 (-1 + Rational(√5))//24 0
+         1//12 (25 + Rational(√5))//120 (25 - 13 * Rational(√5))//120 0
+         1//12 (25 + 13 * Rational(√5))//120 (25 - Rational(√5))//120 0
+         1//12 (11 - Rational(√5))//24 (11 + Rational(√5))//24 0]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(√5) // 10, 1 // 2 + Rational(√5) // 10, 1]
+    b = [1 // 12, 5 // 12, 5 // 12, 1 // 12]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0;
+               -3.0 4.04508497187474 -1.545084971874738 0.5;
+               3.3333333333333357 -6.423503277082812 4.756836610416144 -1.6666666666666674;
+               -1.25 2.7950849718747395 -2.795084971874738 1.25]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIb5(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 5
+    a = [1//20 (-7 - Rational(√21))//120 1//15 (-7 + Rational(√21))//120 0
+         1//20 (343 + 9 * Rational(√21))//2520 (56 - 15 * Rational(√21))//315 (343 - 69 * Rational(√21))//2520 0
+         1//20 (49 + 12 * Rational(√21))//360 8//45 (49 - 12 * Rational(√21))//360 0
+         1//20 (343 + 69 * Rational(√21))//2520 (56 + 15 * Rational(√21))//315 (343 - 9 * Rational(√21))//2520 0
+         1//20 (119 - 3 * Rational(√21))//360 13//45 (119 + 3 * Rational(√21))//360 0]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(√21) // 14, 1 // 2, 1 // 2 + Rational(√21) // 14, 1]
+    b = [1 // 20, 49 // 180, 16 // 45, 49 // 180, 1 // 20]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0 0.0;
+               -5.0 6.756502488724233 -2.6666666666666603 1.4101641779424228 -0.5;
+               10.0 -18.957449421892882 14.222222222222186 -8.264772800329274 3.0;
+               -8.75 19.006502488724166 -18.666666666666604 13.660164177942388 -5.25;
+               2.8 -6.533333333333296 7.466666666666636 -6.533333333333315 2.8]
+    τ_star = 0.33000947820757126
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+# LobattoIIIc
+for stage in (2, 3, 4, 5)
+    alg = Symbol("LobattoIIIc$(stage)")
+    f = Symbol("constructLobattoIIIc$(stage)")
+    @eval constructRK(_alg::$(alg), ::Type{T}) where {T} = $(f)(T, _alg.nested_nlsolve)
+end
+
+function constructLobattoIIIc2(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 2
+    a = [1//2 -1//2
+         1//2 1//2]
+    a = permutedims(a, (2, 1))
+    c = [0, 1]
+    b = [1 // 2, 1 // 2]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0; -0.5 0.5]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIc3(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 3
+    a = [1//6 -1//3 1//6
+         1//6 5//12 -1//12
+         1//6 2//3 1//6]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2, 1]
+    b = [1 // 6, 2 // 3, 1 // 6]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0
+               -1.5 2.0 -0.5
+               0.6666666666666666 -1.3333333333333333 0.6666666666666666]
+    τ_star = 0.7886751345948129 #done
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIc4(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 4
+    a = [1//12 -Rational(sqrt(5))//12 Rational(sqrt(5))//12 -1//12
+         1//12 1//4 (10 - 7 * Rational(sqrt(5)))//60 Rational(sqrt(5))//60
+         1//12 (10 + 7 * Rational(sqrt(5)))//60 1//4 -Rational(sqrt(5))//60
+         1//12 5//12 5//12 1//12]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(sqrt(5)) // 10, 1 // 2 + Rational(sqrt(5)) // 10, 1]
+    b = [1 // 12, 5 // 12, 5 // 12, 1 // 12]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0;
+               -3.0000000000000013 4.04508497187474 -1.545084971874738 0.5000000000000003;
+               3.3333333333333357 -6.423503277082812 4.756836610416144 -1.6666666666666674;
+               -1.2500000000000009 2.7950849718747395 -2.795084971874738 1.2500000000000002]
+    τ_star = 0.5
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructLobattoIIIc5(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 5
+    a = [1//20 -7//60 2//15 -7//60 1//20
+         1//20 29//180 (47 - 15 * Rational(sqrt(21)))//315 (203 - 30 * Rational(sqrt(21)))//1260 -3//140
+         1//20 (329 + 105 * Rational(sqrt(21)))//2880 73//360 (329 - 105 * Rational(sqrt(21)))//2880 3//160
+         1//20 (203 + 30 * Rational(sqrt(21)))//1260 (47 + 15 * Rational(sqrt(21)))//315 29//180 -3//140
+         1//20 49//180 16//45 49//180 1//20]
+    a = permutedims(a, (2, 1))
+    c = [0, 1 // 2 - Rational(sqrt(21)) // 14, 1 // 2, 1 // 2 + Rational(sqrt(21)) // 14, 1]
+    b = [1 // 20, 49 // 180, 16 // 45, 49 // 180, 1 // 20]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0 0.0 0.0 0.0 0.0;
+               -4.9999999999999964 6.756502488724233 -2.6666666666666603 1.4101641779424228 -0.4999999999999985;
+               9.999999999999977 -18.957449421892882 14.222222222222186 -8.264772800329274 2.999999999999991;
+               -8.749999999999961 19.006502488724166 -18.666666666666604 13.660164177942388 -5.249999999999985;
+               2.7999999999999803 -6.533333333333296 7.466666666666636 -6.533333333333315 2.7999999999999927]
+    τ_star = 0.6699905217924309
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
diff --git a/src/radau_tableaus.jl b/src/radau_tableaus.jl
new file mode 100644
index 00000000..c5c6d6ee
--- /dev/null
+++ b/src/radau_tableaus.jl
@@ -0,0 +1,139 @@
+# RadauIIa
+for stage in (1, 2, 3, 5, 7)
+    alg = Symbol("RadauIIa$(stage)")
+    f = Symbol("constructRadauIIa$(stage)")
+    @eval constructRK(_alg::$(alg), ::Type{T}) where {T} = $(f)(T, _alg.nested_nlsolve)
+end
+
+function constructRadauIIa1(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 1
+    a = [1]
+    c = [1]
+    b = [1]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.0;;]
+    τ_star = 0.0
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructRadauIIa2(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 2
+    a = [5//12 -1//12
+         3//4 1//4]
+    a = permutedims(a, (2, 1))
+    c = [1 // 3, 1]
+    b = [3 // 4, 1 // 4]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.5 -0.5;
+               -0.75 0.75]
+    τ_star = 0.0
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructRadauIIa3(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 3
+    a = [11 // 45-7 * Rational(√6) // 360 37 // 225-169 * Rational(√6) // 1800 -2 // 225+Rational(√6) // 75
+         37 // 225+169 * Rational(√6) // 1800 11 // 45+7 * Rational(√6) // 360 -2 // 225-Rational(√6) // 75
+         4 // 9-Rational(√6) // 36 4 // 9+Rational(√6) // 36 1//9]
+    a = permutedims(a, (2, 1))
+    c = [2 // 5 - Rational(√6) // 10, 2 // 5 + Rational(√6) // 10, 1]
+    b = [4 // 9 - Rational(√6) // 36, 4 // 9 + Rational(√6) // 36, 1 // 9]
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.5580782047249224 -0.8914115380582555 0.33333333333333315;
+               -1.9869472213484427 3.320280554681775 -1.3333333333333326;
+               0.8052720793239877 -1.9163831904350983 1.1111111111111107]
+    τ_star = 0.0
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructRadauIIa5(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 5
+    c = [0.5710419611451768219312e-01, 0.2768430136381238276800e+00,
+        0.5835904323689168200567e+00, 0.8602401356562194478479e+00, 1.0]
+    c_p = [1 c[1] c[1]^2 c[1]^3 c[1]^4
+           1 c[2] c[2]^2 c[2]^3 c[2]^4
+           1 c[3] c[3]^2 c[3]^3 c[3]^4
+           1 c[4] c[4]^2 c[4]^3 c[4]^4
+           1 c[5] c[5]^2 c[5]^3 c[5]^4]
+
+    c_q = [c[1] c[1]^2/2 c[1]^3/3 c[1]^4/4 c[1]^5/5
+           c[2] c[2]^2/2 c[2]^3/3 c[2]^4/4 c[2]^5/5
+           c[3] c[3]^2/2 c[3]^3/3 c[3]^4/4 c[3]^5/5
+           c[4] c[4]^2/2 c[4]^3/3 c[4]^4/4 c[4]^5/5
+           c[5] c[5]^2/2 c[5]^3/3 c[5]^4/4 c[5]^5/5]
+
+    a = c_q / c_p
+    b = a[5, :]
+    a = permutedims(a, (2, 1))
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.5864079001863276 -1.0081178814983707 0.7309748661597844 -0.5092648848477398 0.19999999999999882;
+               -5.939631780296778 10.780734269705029 -8.510911966412747 6.069809477004479 -2.3999999999999866;
+               9.977775909015945 -24.44476872321262 26.826271868712684 -20.759279054515957 8.399999999999956;
+               -7.7637202739307325 21.986586239050933 -29.484574687947546 26.461708722827275 -11.19999999999994;
+               2.282881805816463 -7.033077888895508 10.750066442463563 -11.039870359384485 5.0399999999999725]
+    τ_star = 0.0
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
+
+function constructRadauIIa7(::Type{T}, nested::Bool) where {T}
+    # RK coefficients tableau
+    s = 7
+    c = [0.2931642715978489197205e-01, 0.1480785996684842918500e+00,
+        0.3369846902811542990971e+00, 0.5586715187715501320814e+00,
+        0.7692338620300545009169e+00, 0.9269456713197411148519e+00, 1.0]
+    c_p = [1 c[1] c[1]^2 c[1]^3 c[1]^4 c[1]^5 c[1]^6
+           1 c[2] c[2]^2 c[2]^3 c[2]^4 c[2]^5 c[2]^6
+           1 c[3] c[3]^2 c[3]^3 c[3]^4 c[3]^5 c[3]^6
+           1 c[4] c[4]^2 c[4]^3 c[4]^4 c[4]^5 c[4]^6
+           1 c[5] c[5]^2 c[5]^3 c[5]^4 c[5]^5 c[5]^6
+           1 c[6] c[6]^2 c[6]^3 c[6]^4 c[6]^5 c[6]^6
+           1 c[7] c[7]^2 c[7]^3 c[7]^4 c[7]^5 c[7]^6]
+
+    c_q = [c[1] c[1]^2/2 c[1]^3/3 c[1]^4/4 c[1]^5/5 c[1]^6/6 c[1]^7/7
+           c[2] c[2]^2/2 c[2]^3/3 c[2]^4/4 c[2]^5/5 c[2]^6/6 c[2]^7/7
+           c[3] c[3]^2/2 c[3]^3/3 c[3]^4/4 c[3]^5/5 c[3]^6/6 c[3]^7/7
+           c[4] c[4]^2/2 c[4]^3/3 c[4]^4/4 c[4]^5/5 c[4]^6/6 c[4]^7/7
+           c[5] c[5]^2/2 c[5]^3/3 c[5]^4/4 c[5]^5/5 c[5]^6/6 c[5]^7/7
+           c[6] c[6]^2/2 c[6]^3/3 c[6]^4/4 c[6]^5/5 c[6]^6/6 c[6]^7/7
+           c[7] c[7]^2/2 c[7]^3/3 c[7]^4/4 c[7]^5/5 c[7]^6/6 c[7]^7/7]
+
+    a = c_q / c_p
+
+    b = a[7, :]
+
+    a = permutedims(a, (2, 1))
+
+    # Coefficients for constructing q and zeros of p(x) polynomial in bvp5c paper
+    q_coeff = [1.5940642185610567 -1.036553752196515 0.79382172349084 -0.6325776522499784 0.4976107136030369 -0.3592223940655934 0.14285714285715354;
+               -11.867354907681566 21.895554926684994 -18.27080167953177 14.932007947071362 -11.86801681989069 8.60718196191934 -3.4285714285716815;
+               42.56843764866437 -104.58826140801058 119.44339657888817 -106.16674936195061 87.24612664353391 -64.21723581541276 25.714285714287506;
+               -82.61199090291213 232.1402530759895 -317.623307111846 322.4188516023496 -280.67924303496534 212.06972208567558 -85.71428571429115;
+               88.92081439942109 -269.58788741224174 412.88176210349144 -468.1955191607566 439.58250988570325 -345.03025124419685 141.42857142857926;
+               -49.9855578088297 158.9633239184469 -262.58964790376285 324.5017915419607 -328.4240528650557 270.6770002601034 -113.14285714286237;
+               11.456081588332877 -37.62732723293888 65.57712817877311 -86.63425000191717 93.83554041389372 -81.6275811094104 35.020408163266595]
+    τ_star = 0.0
+
+    TU = FIRKTableau(Int64(s), T.(a), T.(c), T.(b), nested)
+    ITU = FIRKInterpTableau(T.(q_coeff), T.(τ_star), Int64(s), nested)
+    return TU, ITU
+end
diff --git a/src/solve/firk.jl b/src/solve/firk.jl
new file mode 100644
index 00000000..3cb7851d
--- /dev/null
+++ b/src/solve/firk.jl
@@ -0,0 +1,497 @@
+@concrete struct FIRKCacheNested{iip, T}
+    order::Int                 # The order of MIRK method
+    stage::Int                 # The state of MIRK method
+    M::Int                     # The number of equations
+    in_size
+    f
+    bc
+    prob                       # BVProblem
+    problem_type               # StandardBVProblem
+    p                          # Parameters
+    alg                        # FIRK methods
+    TU                         # FIRK Tableau
+    ITU                        # FIRK Interpolation Tableau
+    bcresid_prototype
+    # Everything below gets resized in adaptive methods
+    mesh                       # Discrete mesh
+    mesh_dt                    # Step size
+    k_discrete                 # Stage information associated with the discrete Runge-Kutta method
+    y
+    y₀
+    residual
+    # The following 2 caches are never resized
+    fᵢ_cache
+    fᵢ₂_cache
+    defect
+    nest_prob
+    nest_tol
+    resid_size
+    kwargs
+end
+
+Base.eltype(::FIRKCacheNested{iip, T}) where {iip, T} = T
+@concrete struct FIRKCacheExpand{iip, T}
+    order::Int                 # The order of MIRK method
+    stage::Int                 # The state of MIRK method
+    M::Int                     # The number of equations
+    in_size
+    f
+    bc
+    prob                       # BVProblem
+    problem_type               # StandardBVProblem
+    p                          # Parameters
+    alg                        # FIRK methods
+    TU                         # FIRK Tableau
+    ITU                        # FIRK Interpolation Tableau
+    bcresid_prototype
+    # Everything below gets resized in adaptive methods
+    mesh                       # Discrete mesh
+    mesh_dt                    # Step size
+    k_discrete                 # Stage information associated with the discrete Runge-Kutta method
+    y
+    y₀
+    residual
+    # The following 2 caches are never resized
+    fᵢ_cache
+    fᵢ₂_cache
+    defect
+    resid_size
+    kwargs
+end
+Base.eltype(::FIRKCacheExpand{iip, T}) where {iip, T} = T
+
+function extend_y(y, N::Int, stage::Int)
+    y_extended = similar(y.u, (N - 1) * (stage + 1) + 1)
+    y_extended[1] = y.u[1]
+    let ctr1 = 2
+        for i in 2:N
+            for j in 1:(stage + 1)
+                y_extended[(ctr1)] = y.u[i]
+                ctr1 += 1
+            end
+        end
+    end
+    return VectorOfArray(y_extended)
+end
+
+function shrink_y(y, N, M, stage)
+    y_shrink = similar(y, N)
+    y_shrink[1] = y[1]
+    let ctr = stage + 2
+        for i in 2:N
+            y_shrink[i] = y[ctr]
+            ctr += (stage + 1)
+        end
+    end
+    return y_shrink
+end
+
+function SciMLBase.__init(prob::BVProblem, alg::AbstractFIRK; dt = 0.0,
+        abstol = 1e-3, adaptive = true, kwargs...)
+    if alg.nested_nlsolve
+        return init_nested(
+            prob, alg; dt = dt, abstol = abstol, adaptive = adaptive, kwargs...)
+    else
+        return init_expanded(
+            prob, alg; dt = dt, abstol = abstol, adaptive = adaptive, kwargs...)
+    end
+end
+
+function init_nested(prob::BVProblem, alg::AbstractFIRK; dt = 0.0,
+        abstol = 1e-3, adaptive = true, kwargs...)
+    @set! alg.jac_alg = concrete_jacobian_algorithm(alg.jac_alg, prob, alg)
+
+    iip = isinplace(prob)
+    if adaptive && isa(alg, FIRKNoAdaptivity)
+        error("Algorithm doesn't support adaptivity. Please choose a higher order algorithm.")
+    end
+
+    t₀, t₁ = prob.tspan
+    ig, T, M, Nig, X = __extract_problem_details(prob; dt, check_positive_dt = true)
+    mesh = __extract_mesh(prob.u0, t₀, t₁, Nig)
+    mesh_dt = diff(mesh)
+
+    chunksize = pickchunksize(M * (Nig - 1))
+    __alloc = @closure x -> __maybe_allocate_diffcache(vec(x), chunksize, alg.jac_alg)
+
+    fᵢ_cache = __alloc(zero(X))
+    fᵢ₂_cache = vec(zero(X))
+
+    # Don't flatten this here, since we need to expand it later if needed
+    y₀ = __initial_guess_on_mesh(prob.u0, mesh, prob.p)
+
+    y = __alloc.(copy.(y₀.u))
+    TU, ITU = constructRK(alg, T)
+    stage = alg_stage(alg)
+
+    k_discrete = [__maybe_allocate_diffcache(
+                      __similar(X, M, stage), chunksize, alg.jac_alg) for _ in 1:Nig]
+
+    bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
+
+    residual = if iip
+        if prob.problem_type isa TwoPointBVProblem
+            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        else
+            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+        end
+    else
+        nothing
+    end
+
+    defect = VectorOfArray([__similar(X, ifelse(adaptive, M, 0)) for _ in 1:Nig])
+
+    # Transform the functions to handle non-vector inputs
+    bcresid_prototype = __vec(bcresid_prototype)
+    f, bc = if X isa AbstractVector
+        prob.f, prob.f.bc
+    elseif iip
+        vecf! = @closure (du, u, p, t) -> __vec_f!(du, u, p, t, prob.f, size(X))
+        vecbc! = if !(prob.problem_type isa TwoPointBVProblem)
+            @closure (r, u, p, t) -> __vec_bc!(r, u, p, t, prob.f.bc, resid₁_size, size(X))
+        else
+            (
+                @closure((r, u, p)->__vec_bc!(
+                    r, u, p, first(prob.f.bc), resid₁_size[1], size(X))),
+                @closure((r, u, p)->__vec_bc!(
+                    r, u, p, last(prob.f.bc), resid₁_size[2], size(X))))
+        end
+        vecf!, vecbc!
+    else
+        vecf = @closure (u, p, t) -> __vec_f(u, p, t, prob.f, size(X))
+        vecbc = if !(prob.problem_type isa TwoPointBVProblem)
+            @closure (u, p, t) -> __vec_bc(u, p, t, prob.f.bc, size(X))
+        else
+            (@closure((u, p)->__vec_bc(u, p, first(prob.f.bc), size(X))),
+                @closure((u, p)->__vec_bc(u, p, last(prob.f.bc), size(X))))
+        end
+        vecf, vecbc
+    end
+
+    prob_ = !(prob.u0 isa AbstractArray) ? remake(prob; u0 = X) : prob
+
+    if isa(prob.u0, AbstractArray) && eltype(prob.u0) <: AbstractVector
+        u0_mat = hcat(prob.u0...)
+        avg_u0 = vec(sum(u0_mat, dims = 2)) / size(u0_mat, 2)
+    else
+        avg_u0 = prob.u0
+    end
+
+    K0 = repeat(avg_u0, 1, stage) # Somewhat arbitrary initialization of K
+
+    nestprob_p = zeros(T, M + 2)
+    nest_tol = (alg.nest_tol > eps(T) ? alg.nest_tol : nothing)
+
+    if iip
+        nestprob = NonlinearProblem(
+            (res, K, p) -> FIRK_nlsolve!(res, K, p, f, TU, prob.p), K0, nestprob_p)
+    else
+        nestprob = NonlinearProblem(
+            (K, p) -> FIRK_nlsolve(K, p, f, TU, prob.p), K0, nestprob_p)
+    end
+
+    return FIRKCacheNested{iip, T}(
+        alg_order(alg), stage, M, size(X), f, bc, prob_, prob.problem_type,
+        prob.p, alg, TU, ITU, bcresid_prototype, mesh, mesh_dt,
+        k_discrete, y, y₀, residual, fᵢ_cache, fᵢ₂_cache, defect, nestprob,
+        nest_tol, resid₁_size, (; abstol, dt, adaptive, kwargs...))
+end
+
+function init_expanded(prob::BVProblem, alg::AbstractFIRK; dt = 0.0,
+        abstol = 1e-3, adaptive = true, kwargs...)
+    @set! alg.jac_alg = concrete_jacobian_algorithm(alg.jac_alg, prob, alg)
+
+    if adaptive && isa(alg, FIRKNoAdaptivity)
+        error("Algorithm $(alg) doesn't support adaptivity. Please choose a higher order algorithm.")
+    end
+
+    iip = isinplace(prob)
+
+    t₀, t₁ = prob.tspan
+    ig, T, M, Nig, X = __extract_problem_details(prob; dt, check_positive_dt = true)
+    mesh = __extract_mesh(prob.u0, t₀, t₁, Nig)
+    mesh_dt = diff(mesh)
+
+    TU, ITU = constructRK(alg, T)
+    stage = alg_stage(alg)
+
+    chunksize = pickchunksize(M + M * Nig * (stage + 1))
+    __alloc = @closure x -> __maybe_allocate_diffcache(vec(x), chunksize, alg.jac_alg)
+
+    fᵢ_cache = __alloc(zero(X)) # Runtime dispatch
+    fᵢ₂_cache = vec(zero(X))
+
+    # Don't flatten this here, since we need to expand it later if needed
+    _y₀ = __initial_guess_on_mesh(prob.u0, mesh, prob.p)
+    y₀ = extend_y(_y₀, Nig + 1, stage)
+    y = __alloc.(copy.(y₀.u)) # Runtime dispatch
+
+    k_discrete = [__maybe_allocate_diffcache(
+                      __similar(X, M, stage), chunksize, alg.jac_alg) for _ in 1:Nig] # Runtime dispatch
+
+    bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
+
+    residual = if iip
+        if prob.problem_type isa TwoPointBVProblem
+            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        else
+            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+        end
+    else
+        nothing
+    end
+
+    defect = VectorOfArray([similar(X, ifelse(adaptive, M, 0)) for _ in 1:Nig])
+
+    # Transform the functions to handle non-vector inputs
+    bcresid_prototype = __vec(bcresid_prototype)
+    f, bc = if X isa AbstractVector
+        prob.f, prob.f.bc
+    elseif iip
+        vecf! = @closure (du, u, p, t) -> __vec_f!(du, u, p, t, prob.f, size(X))
+        vecbc! = if !(prob.problem_type isa TwoPointBVProblem)
+            @closure (r, u, p, t) -> __vec_bc!(r, u, p, t, prob.f.bc, resid₁_size, size(X))
+        else
+            (
+                @closure((r, u, p)->__vec_bc!(
+                    r, u, p, first(prob.f.bc)[1], resid₁_size[1], size(X))),
+                @closure ((r, u, p) -> __vec_bc!(
+                    r, u, p, last(prob.f.bc)[2], resid₁_size[2], size(X))))
+        end
+        vecf!, vecbc!
+    else
+        vecf = @closure (u, p, t) -> __vec_f(u, p, t, prob.f, size(X))
+        vecbc = if !(prob.problem_type isa TwoPointBVProblem)
+            @closure (u, p, t) -> __vec_bc(u, p, t, prob.f.bc, size(X))
+        else
+            (@closure((u, p)->__vec_bc(u, p, first(prob.f.bc), size(X))),
+                @closure((u, p)->__vec_bc(u, p, last(prob.f.bc), size(X))))
+        end
+        vecf, vecbc
+    end
+
+    prob_ = !(prob.u0 isa AbstractArray) ? remake(prob; u0 = X) : prob
+
+    return FIRKCacheExpand{iip, T}(
+        alg_order(alg), stage, M, size(X), f, bc, prob_, prob.problem_type, prob.p,
+        alg, TU, ITU, bcresid_prototype, mesh, mesh_dt, k_discrete, y, y₀, residual,
+        fᵢ_cache, fᵢ₂_cache, defect, resid₁_size, (; abstol, dt, adaptive, kwargs...))
+end
+
+"""
+    __expand_cache!(cache::FIRKCache)
+
+After redistributing or halving the mesh, this function expands the required vectors to
+match the length of the new mesh.
+"""
+function __expand_cache!(cache::FIRKCacheExpand)
+    Nₙ = length(cache.mesh)
+    __append_similar!(cache.k_discrete, Nₙ - 1, cache.M, cache.TU)
+    __append_similar!(cache.y, Nₙ, cache.M, cache.TU)
+    __append_similar!(cache.y₀, Nₙ, cache.M, cache.TU)
+    __append_similar!(cache.residual, Nₙ, cache.M, cache.TU)
+    __append_similar!(cache.defect, Nₙ - 1, cache.M, cache.TU)
+    return cache
+end
+
+function SciMLBase.solve!(cache::FIRKCacheExpand)
+    (abstol, adaptive, _), kwargs = __split_mirk_kwargs(; cache.kwargs...)
+    info::ReturnCode.T = ReturnCode.Success
+
+    # We do the first iteration outside the loop to preserve type-stability of the
+    # `original` field of the solution
+    sol_nlprob, info, defect_norm = __perform_firk_iteration(
+        cache, abstol, adaptive; kwargs...)
+
+    if adaptive
+        while SciMLBase.successful_retcode(info) && defect_norm > abstol
+            sol_nlprob, info, defect_norm = __perform_firk_iteration(
+                cache, abstol, adaptive; kwargs...)
+        end
+    end
+
+    # sync y and y0 caches
+    for i in axes(cache.y₀, 1)
+        cache.y[i].du .= cache.y₀.u[i]
+    end
+
+    u = shrink_y([reshape(y, cache.in_size) for y in cache.y₀],
+        length(cache.mesh), cache.M, alg_stage(cache.alg))
+
+    interpolation = __build_interpolation(cache, u)
+    odesol = DiffEqBase.build_solution(
+        cache.prob, cache.alg, cache.mesh, u; interp = interpolation, retcode = info)
+    return __build_solution(cache.prob, odesol, sol_nlprob)
+end
+
+function __perform_firk_iteration(
+        cache::FIRKCacheExpand, abstol, adaptive; nlsolve_kwargs = (;), kwargs...)
+    nlprob = __construct_nlproblem(cache, vec(cache.y₀))
+    nlsolve_alg = __concrete_nonlinearsolve_algorithm(nlprob, cache.alg.nlsolve)
+    sol_nlprob = __solve(
+        nlprob, nlsolve_alg; abstol, kwargs..., nlsolve_kwargs..., alias_u0 = true)
+    recursive_unflatten!(cache.y₀, sol_nlprob.u)
+
+    defect_norm = 2 * abstol
+
+    # Early terminate if non-adaptive
+    adaptive || return sol_nlprob, sol_nlprob.retcode, defect_norm
+
+    info = sol_nlprob.retcode
+
+    if info == ReturnCode.Success # Nonlinear Solve was successful
+        defect_norm = defect_estimate!(cache)
+        # The defect is greater than 10%, the solution is not acceptable
+        defect_norm > cache.alg.defect_threshold && (info = ReturnCode.Failure)
+    end
+
+    if info == ReturnCode.Success # Nonlinear Solve Successful and defect norm is acceptable
+        if defect_norm > abstol
+            # We construct a new mesh to equidistribute the defect
+            mesh, mesh_dt, _, info = mesh_selector!(cache)
+            if info == ReturnCode.Success
+                __append_similar!(cache.y₀, length(cache.mesh), cache.M, cache.TU)
+                for (i, m) in enumerate(cache.mesh)
+                    interp_eval!(cache.y₀.u[i], cache, m, mesh, mesh_dt)
+                end
+                __expand_cache!(cache)
+            end
+        end
+    else # Something bad happened
+        # We cannot obtain a solution for the current mesh
+        if 2 * (length(cache.mesh) - 1) > cache.alg.max_num_subintervals
+            # New mesh would be too large
+            info = ReturnCode.Failure
+        else
+            half_mesh!(cache)
+            __expand_cache!(cache)
+            recursivefill!(cache.y₀, 0)
+            info = ReturnCode.Success # Force a restart
+        end
+    end
+
+    return sol_nlprob, info, defect_norm
+end
+
+function __construct_nlproblem(
+        cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
+    (; nlsolve, jac_alg) = cache.alg
+    N = length(cache.mesh)
+    TU, ITU = constructRK(cache.alg, eltype(y))
+    (; s) = TU
+
+    resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
+    resid_collocation = __similar(y, cache.M * (N - 1) * (TU.s + 1))
+
+    loss_bcₚ = (iip ? __Fix3 : Base.Fix2)(loss_bc, cache.p)
+    loss_collocationₚ = (iip ? __Fix3 : Base.Fix2)(loss_collocation, cache.p)
+
+    sd_bc = jac_alg.bc_diffmode isa AutoSparse ? SymbolicsSparsityDetection() :
+            NoSparsityDetection()
+    cache_bc = __sparse_jacobian_cache(
+        Val(iip), jac_alg.bc_diffmode, sd_bc, loss_bcₚ, resid_bc, y)
+
+    sd_collocation = if jac_alg.nonbc_diffmode isa AutoSparse
+        if L < cache.M
+            # For underdetermined problems we use sparse since we don't have banded qr
+            colored_matrix = __generate_sparse_jacobian_prototype(
+                cache, cache.problem_type, y, y, cache.M, N)
+            J_full_band = nothing
+            __sparsity_detection_alg(ColoredMatrix(
+                sparse(colored_matrix.M), colored_matrix.row_colorvec,
+                colored_matrix.col_colorvec))
+        else
+            block_size = cache.M * (s + 2)
+            J_full_band = BandedMatrix(
+                Ones{eltype(y)}(
+                    L + cache.M * (s + 1) * (N - 1), cache.M * (s + 1) * (N - 1) + cache.M),
+                (block_size, block_size))
+            __sparsity_detection_alg(__generate_sparse_jacobian_prototype(
+                cache, cache.problem_type, y, y, cache.M, N))
+        end
+    else
+        J_full_band = nothing
+        NoSparsityDetection()
+    end
+
+    cache_collocation = __sparse_jacobian_cache(
+        Val(iip), jac_alg.nonbc_diffmode, sd_collocation,
+        loss_collocationₚ, resid_collocation, y)
+
+    J_bc = zero(init_jacobian(cache_bc))
+    J_c = zero(init_jacobian(cache_collocation))
+
+    if J_full_band === nothing
+        jac_prototype = vcat(J_bc, J_c)
+    else
+        jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
+    end
+
+    jac = if iip
+        @closure (J, u, p) -> __mirk_mpoint_jacobian!(
+            J, J_c, u, jac_alg.bc_diffmode, jac_alg.nonbc_diffmode, cache_bc,
+            cache_collocation, loss_bcₚ, loss_collocationₚ, resid_bc, resid_collocation, L)
+    else
+        @closure (u, p) -> __mirk_mpoint_jacobian(
+            jac_prototype, J_c, u, jac_alg.bc_diffmode, jac_alg.nonbc_diffmode,
+            cache_bc, cache_collocation, loss_bcₚ, loss_collocationₚ, L)
+    end
+
+    resid_prototype = vcat(resid_bc, resid_collocation)
+
+    resid_prototype = vcat(resid_bc, resid_collocation)
+    nlf = __unsafe_nonlinearfunction{iip}(loss; resid_prototype, jac, jac_prototype)
+
+    return __internal_nlsolve_problem(cache.prob, resid_prototype, y, nlf, y, cache.p)
+end
+
+function __construct_nlproblem(
+        cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+    (; nlsolve, jac_alg) = cache.alg
+    N = length(cache.mesh)
+    TU, ITU = constructRK(cache.alg, eltype(y))
+    (; s) = TU
+
+    lossₚ = iip ? ((du, u) -> loss(du, u, cache.p)) : (u -> loss(u, cache.p))
+
+    resid_collocation = __similar(y, cache.M * (N - 1) * (TU.s + 1))
+
+    resid = vcat(
+        @view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]), resid_collocation,
+        @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+    L = length(cache.bcresid_prototype)
+
+    sd = if jac_alg.nonbc_diffmode isa AutoSparse
+        block_size = cache.M * (s + 2)
+        J_full_band = BandedMatrix(
+            Ones{eltype(y)}(
+                L + cache.M * (s + 1) * (N - 1), cache.M * (s + 1) * (N - 1) + cache.M),
+            (block_size, block_size))
+        __sparsity_detection_alg(__generate_sparse_jacobian_prototype(
+            cache, cache.problem_type,
+            @view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
+            @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]),
+            cache.M, N))
+    else
+        J_full_band = nothing
+        NoSparsityDetection()
+    end
+
+    diffcache = __sparse_jacobian_cache(Val(iip), jac_alg.diffmode, sd, lossₚ, resid, y)
+    jac_prototype = zero(init_jacobian(diffcache))
+
+    jac = if iip
+        @closure (J, u, p) -> __mirk_2point_jacobian!(
+            J, u, jac_alg.diffmode, diffcache, lossₚ, resid)
+    else
+        @closure (u, p) -> __mirk_2point_jacobian(
+            u, jac_prototype, jac_alg.diffmode, diffcache, lossₚ)
+    end
+
+    resid_prototype = copy(resid)
+    nlf = __unsafe_nonlinearfunction{iip}(loss; resid_prototype, jac, jac_prototype)
+    return __internal_nlsolve_problem(cache.prob, resid_prototype, y, nlf, y, cache.p)
+end
diff --git a/src/solve/mirk.jl b/src/solve/mirk.jl
index d9dcb2c0..e84a001f 100644
--- a/src/solve/mirk.jl
+++ b/src/solve/mirk.jl
@@ -54,8 +54,8 @@ function SciMLBase.__init(prob::BVProblem, alg::AbstractMIRK; dt = 0.0,
     TU, ITU = constructMIRK(alg, T)
     stage = alg_stage(alg)
 
-    k_discrete = [__maybe_allocate_diffcache(__similar(X, N, stage), chunksize, alg.jac_alg)
-                  for _ in 1:Nig]
+    k_discrete = [__maybe_allocate_diffcache(
+                      __similar(X, N, stage), chunksize, alg.jac_alg) for _ in 1:Nig]
     k_interp = VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
@@ -127,11 +127,21 @@ function __expand_cache!(cache::MIRKCache)
     return cache
 end
 
+function __expand_cache!(cache::FIRKCacheNested)
+    Nₙ = length(cache.mesh)
+    __append_similar!(cache.k_discrete, Nₙ - 1, cache.M)
+    __append_similar!(cache.y, Nₙ, cache.M)
+    __append_similar!(cache.y₀, Nₙ, cache.M)
+    __append_similar!(cache.residual, Nₙ, cache.M)
+    __append_similar!(cache.defect, Nₙ - 1, cache.M)
+    return cache
+end
+
 function __split_mirk_kwargs(; abstol, dt, adaptive = true, kwargs...)
     return ((abstol, adaptive, dt), (; abstol, adaptive, kwargs...))
 end
 
-function SciMLBase.solve!(cache::MIRKCache)
+function SciMLBase.solve!(cache::Union{MIRKCache, FIRKCacheNested})
     (abstol, adaptive, _), kwargs = __split_mirk_kwargs(; cache.kwargs...)
     info::ReturnCode.T = ReturnCode.Success
 
@@ -149,13 +159,15 @@ function SciMLBase.solve!(cache::MIRKCache)
 
     u = recursivecopy(cache.y₀)
 
-    odesol = DiffEqBase.build_solution(cache.prob, cache.alg, cache.mesh, u.u;
-        interp = MIRKInterpolation(cache.mesh, u.u, cache), retcode = info)
+    interpolation = __build_interpolation(cache, u.u)
+
+    odesol = DiffEqBase.build_solution(
+        cache.prob, cache.alg, cache.mesh, u.u; interp = interpolation, retcode = info)
     return __build_solution(cache.prob, odesol, sol_nlprob)
 end
 
-function __perform_mirk_iteration(
-        cache::MIRKCache, abstol, adaptive::Bool; nlsolve_kwargs=(;), kwargs...)
+function __perform_mirk_iteration(cache::Union{MIRKCache, FIRKCacheNested}, abstol,
+        adaptive::Bool; nlsolve_kwargs = (;), kwargs...)
     nlprob = __construct_nlproblem(cache, vec(cache.y₀))
     nlsolve_alg = __concrete_nonlinearsolve_algorithm(nlprob, cache.alg.nlsolve)
     sol_nlprob = __solve(
@@ -204,7 +216,9 @@ function __perform_mirk_iteration(
 end
 
 # Constructing the Nonlinear Problem
-function __construct_nlproblem(cache::MIRKCache{iip}, y::AbstractVector) where {iip}
+function __construct_nlproblem(
+        cache::Union{MIRKCache{iip}, FIRKCacheNested{iip}, FIRKCacheExpand{iip}},
+        y::AbstractVector) where {iip}
     pt = cache.problem_type
 
     loss_bc = if iip
@@ -273,15 +287,16 @@ end
     return vcat(resid_bca, mapreduce(vec, vcat, resid_co), resid_bcb)
 end
 
-@views function __mirk_loss_bc!(
-        resid, u, p, pt, bc!::BC, y, mesh, cache::MIRKCache) where {BC}
+@views function __mirk_loss_bc!(resid, u, p, pt, bc!::BC, y, mesh,
+        cache::Union{MIRKCache, FIRKCacheNested, FIRKCacheExpand}) where {BC}
     y_ = recursive_unflatten!(y, u)
     soly_ = VectorOfArray(y_)
     eval_bc_residual!(resid, pt, bc!, soly_, p, mesh)
     return nothing
 end
 
-@views function __mirk_loss_bc(u, p, pt, bc!::BC, y, mesh, cache::MIRKCache) where {BC}
+@views function __mirk_loss_bc(u, p, pt, bc!::BC, y, mesh,
+        cache::Union{MIRKCache, FIRKCacheNested, FIRKCacheExpand}) where {BC}
     y_ = recursive_unflatten!(y, u)
     soly_ = VectorOfArray(y_)
     return eval_bc_residual(pt, bc!, soly_, p, mesh)
@@ -301,8 +316,9 @@ end
     return mapreduce(vec, vcat, resids)
 end
 
-function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
+function __construct_nlproblem(
+        cache::Union{MIRKCache{iip}, FIRKCacheNested{iip}}, y, loss_bc::BC,
+        loss_collocation::C, loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
     (; nlsolve, jac_alg) = cache.alg
     N = length(cache.mesh)
 
@@ -406,8 +422,9 @@ function __mirk_mpoint_jacobian(
     return J
 end
 
-function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+function __construct_nlproblem(
+        cache::Union{MIRKCache{iip}, FIRKCacheNested{iip}}, y, loss_bc::BC,
+        loss_collocation::C, loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
     (; nlsolve, jac_alg) = cache.alg
     N = length(cache.mesh)
 
diff --git a/src/sparse_jacobians.jl b/src/sparse_jacobians.jl
index 27939969..016b3790 100644
--- a/src/sparse_jacobians.jl
+++ b/src/sparse_jacobians.jl
@@ -54,7 +54,7 @@ function __generate_sparse_jacobian_prototype(cache::MIRKCache, ya, yb, M, N)
 end
 
 function __generate_sparse_jacobian_prototype(
-        ::MIRKCache, ::StandardBVProblem, ya, yb, M, N)
+        ::Union{MIRKCache, FIRKCacheNested}, ::StandardBVProblem, ya, yb, M, N)
     fast_scalar_indexing(ya) ||
         error("Sparse Jacobians are only supported for Fast Scalar Index-able Arrays")
     J_c = BandedMatrix(Ones{eltype(ya)}(M * (N - 1), M * N), (1, 2M - 1))
@@ -62,7 +62,7 @@ function __generate_sparse_jacobian_prototype(
 end
 
 function __generate_sparse_jacobian_prototype(
-        ::MIRKCache, ::TwoPointBVProblem, ya, yb, M, N)
+        ::Union{MIRKCache, FIRKCacheNested}, ::TwoPointBVProblem, ya, yb, M, N)
     fast_scalar_indexing(ya) ||
         error("Sparse Jacobians are only supported for Fast Scalar Index-able Arrays")
     J₁ = length(ya) + length(yb) + M * (N - 1)
@@ -73,6 +73,107 @@ function __generate_sparse_jacobian_prototype(
     return ColoredMatrix(J, matrix_colors(J'), matrix_colors(J))
 end
 
+function __generate_sparse_jacobian_prototype(
+        cache::FIRKCacheExpand, ::StandardBVProblem, ya, yb, M, N)
+    (; TU) = cache
+    (; s) = TU
+    # Get number of nonzeros
+    block_size = M * (s + 1) * M * (s + 2)
+    l = (N - 1) * block_size
+    # Initialize Is and Js
+    Is = Vector{Int}(undef, l)
+    Js = Vector{Int}(undef, l)
+
+    # Fill Is and Js
+    row_size = M * (s + 1) * (N - 1)
+
+    idx = 1
+    i_start = 0
+    j_start = 0
+    i_step = M * (s + 1)
+    j_step = M * (s + 2)
+    for k in 1:(N - 1)
+        for i in 1:i_step
+            for j in 1:j_step
+                Is[idx] = i + i_start
+                Js[idx] = j + j_start
+                idx += 1
+            end
+        end
+        i_start += i_step
+        j_start += i_step
+    end
+
+    # Create sparse matrix from Is and Js
+    J_c = _sparse_like(Is, Js, ya, row_size, row_size + M)
+
+    col_colorvec = Vector{Int}(undef, size(J_c, 2))
+    for i in eachindex(col_colorvec)
+        col_colorvec[i] = mod1(i, (2 * M * (s + 1)) + M)
+    end
+    row_colorvec = Vector{Int}(undef, size(J_c, 1))
+    for i in eachindex(row_colorvec)
+        row_colorvec[i] = mod1(i, (2 * M * (s + 1)) + M)
+    end
+
+    return ColoredMatrix(J_c, row_colorvec, col_colorvec)
+end
+
+function __generate_sparse_jacobian_prototype(
+        cache::FIRKCacheExpand, ::TwoPointBVProblem, ya, yb, M, N)
+    (; TU) = cache
+    (; s) = TU
+    # Get number of nonzeros
+    block_size = M * (s + 1) * M * (s + 2)
+    l = (N - 1) * block_size + M * (s + 2) * (length(ya) + length(yb))
+    # Initialize Is and Js
+    Is = Vector{Int}(undef, l)
+    Js = Vector{Int}(undef, l)
+
+    # Fill Is and Js
+    row_size = M * (s + 1) * (N - 1)
+    idx = 1
+    i_start = 0
+    j_start = 0
+    i_step = M * (s + 1)
+    j_step = M * (s + 2)
+
+    # Fill first rows
+    for i in 1:length(ya)
+        for j in 1:j_step
+            Is[idx] = i
+            Js[idx] = j
+            idx += 1
+        end
+    end
+    i_start += length(ya)
+
+    for k in 1:(N - 1)
+        for i in 1:i_step
+            for j in 1:j_step
+                Is[idx] = i + i_start
+                Js[idx] = j + j_start
+                idx += 1
+            end
+        end
+        i_start += i_step
+        j_start += i_step
+    end
+    j_start -= i_step
+    #Fill last rows
+    for i in 1:length(yb)
+        for j in 1:j_step
+            Is[idx] = i + i_start
+            Js[idx] = j + j_start
+            idx += 1
+        end
+    end
+
+    # Create sparse matrix from Is and Js
+    J = _sparse_like(Is, Js, ya, row_size + length(ya) + length(yb), row_size + M)
+    return ColoredMatrix(J, matrix_colors(J'), matrix_colors(J))
+end
+
 # For Multiple Shooting
 """
     __generate_sparse_jacobian_prototype(::MultipleShooting, ::StandardBVProblem,
diff --git a/src/types.jl b/src/types.jl
index 448f1367..2000ea9b 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -28,6 +28,31 @@ struct MIRKInterpTableau{s, c, v, x, τ}
     end
 end
 
+# FIRK Method Tableaus
+struct FIRKTableau{nested, sType, aType, cType, bType}
+    """Discrete stages of RK formula"""
+    s::sType
+    a::aType
+    c::cType
+    b::bType
+
+    function FIRKTableau(s, a, c, b, nested)
+        @assert eltype(a) == eltype(c) == eltype(b)
+        return new{nested, typeof(s), typeof(a), typeof(c), typeof(b)}(s, a, c, b)
+    end
+end
+
+struct FIRKInterpTableau{nested, c, m}
+    q_coeff::c
+    τ_star::m
+    stage::Int
+
+    function FIRKInterpTableau(q_coeff, τ_star, stage, nested::Bool)
+        @assert eltype(q_coeff) == eltype(τ_star)
+        return new{nested, typeof(q_coeff), typeof(τ_star)}(q_coeff, τ_star, stage)
+    end
+end
+
 # Sparsity Detection
 @concrete struct BVPJacobianAlgorithm
     bc_diffmode
diff --git a/src/utils.jl b/src/utils.jl
index 1ecacd4f..0e8fa0bd 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -125,6 +125,37 @@ function __append_similar!(x::AbstractVector{<:MaybeDiffCache}, n, M)
     return x
 end
 
+function __append_similar!(x::AbstractVector{<:AbstractArray}, n, _, TU::FIRKTableau{false})
+    (; s) = TU
+    N = (n - 1) * (s + 1) + 1 - length(x)
+    N == 0 && return x
+    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
+    append!(x, [similar(last(x)) for _ in 1:N])
+    return x
+end
+
+function __append_similar!(
+        x::AbstractVector{<:MaybeDiffCache}, n, M, TU::FIRKTableau{false})
+    (; s) = TU
+    N = (n - 1) * (s + 1) + 1 - length(x)
+    N == 0 && return x
+    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
+    chunksize = isa(TU, FIRKTableau{false}) ? pickchunksize(M * (N + length(x) * (s + 1))) :
+                pickchunksize(M * (N + length(x)))
+    append!(x, [__maybe_allocate_diffcache(last(x), chunksize) for _ in 1:N])
+    return x
+end
+
+function __append_similar!(x::AbstractVectorOfArray, n, M, TU::FIRKTableau{false})
+    (; s) = TU
+    N = (n - 1) * (s + 1) + 1 - length(x)
+    N == 0 && return x
+    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
+    append!(x, VectorOfArray([similar(last(x)) for _ in 1:N]))
+    return x
+end
+
+__append_similar!(::Nothing, n, _, _) = nothing
 function __append_similar!(x::AbstractVectorOfArray, n, _)
     N = n - length(x)
     N == 0 && return x
@@ -190,8 +221,7 @@ function __get_bcresid_prototype(::TwoPointBVProblem, prob::BVProblem, u)
     return prototype, size.(prototype)
 end
 function __get_bcresid_prototype(::StandardBVProblem, prob::BVProblem, u)
-    prototype = prob.f.bcresid_prototype !== nothing ? prob.f.bcresid_prototype :
-                zero(u)
+    prototype = prob.f.bcresid_prototype !== nothing ? prob.f.bcresid_prototype : zero(u)
     return prototype, size(prototype)
 end
 
@@ -200,8 +230,8 @@ end
     return zero(y)
 end
 
-@inline function __fill_like(v, x)
-    y = __similar(x)
+@inline function __fill_like(v, x, args...)
+    y = __similar(x, args...)
     fill!(y, v)
     return y
 end
@@ -385,5 +415,4 @@ end
 
 @inline (f::__Fix3{F})(a, b) where {F} = f.f(a, b, f.x)
 
-
 # convert every vector of vector to AbstractVectorOfArray, especially if them come from get_tmp of PreallocationTools.jl
diff --git a/test/firk/expanded/ensemble_tests.jl b/test/firk/expanded/ensemble_tests.jl
new file mode 100644
index 00000000..3e6b2930
--- /dev/null
+++ b/test/firk/expanded/ensemble_tests.jl
@@ -0,0 +1,72 @@
+@testitem "EnsembleProblem" begin
+    using Random
+
+    function ode!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -p[1] * u[1]
+    end
+
+    function bc!(residual, u, p, t)
+        residual[1] = u[:, 1][1] - 1.0
+        residual[2] = u[:, end][1]
+    end
+
+    prob_func(prob, i, repeat) = remake(prob, p = [rand()])
+
+    u0 = [0.0, 1.0]
+    tspan = (0, pi / 2)
+    p = [rand()]
+    bvp = BVProblem(ode!, bc!, u0, tspan, p)
+    ensemble_prob = EnsembleProblem(bvp; prob_func)
+    nlsolve = NewtonRaphson()
+    nested = false
+
+    @testset "$(solver)" for solver in (RadauIIa2, RadauIIa3, RadauIIa5, RadauIIa7) # RadauIIa1 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (
+        LobattoIIIa2, LobattoIIIa3, LobattoIIIa4, LobattoIIIa5)
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (LobattoIIIb3, LobattoIIIb4, LobattoIIIb5) # LobattoIIIb2 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (LobattoIIIc3, LobattoIIIc4, LobattoIIIc5) # LobattoIIIc2 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+end
diff --git a/test/firk/expanded/firk_basic_tests.jl b/test/firk/expanded/firk_basic_tests.jl
new file mode 100644
index 00000000..8ef94712
--- /dev/null
+++ b/test/firk/expanded/firk_basic_tests.jl
@@ -0,0 +1,408 @@
+@testsetup module FIRKExpandedConvergenceTests
+
+using BoundaryValueDiffEq
+
+nested = false
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIa$(stage)")
+    @eval lobattoIIIa_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIb$(stage)")
+    @eval lobattoIIIb_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIc$(stage)")
+    @eval lobattoIIIc_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (2, 3, 5, 7)
+    s = Symbol("RadauIIa$(stage)")
+    @eval radau_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+# First order test
+function f1!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = 0
+end
+f1(u, p, t) = [u[2], 0]
+
+# Second order linear test
+function f2!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = -u[1]
+end
+f2(u, p, t) = [u[2], -u[1]]
+
+function boundary!(residual, u, p, t)
+    residual[1] = u[:, 1][1] - 5
+    residual[2] = u[:, end][1]
+end
+boundary(u, p, t) = [u[:, 1][1] - 5, u[:, end][1]]
+
+function boundary_two_point_a!(resida, ua, p)
+    resida[1] = ua[1] - 5
+end
+function boundary_two_point_b!(residb, ub, p)
+    residb[1] = ub[1]
+end
+
+boundary_two_point_a(ua, p) = [ua[1] - 5]
+boundary_two_point_b(ub, p) = [ub[1]]
+
+# Not able to change the initial condition.
+# Hard coded solution.
+odef1! = ODEFunction(f1!, analytic = (u0, p, t) -> [5 - t, -1])
+odef1 = ODEFunction(f1, analytic = (u0, p, t) -> [5 - t, -1])
+
+odef2! = ODEFunction(f2!,
+    analytic = (u0, p, t) -> [
+        5 * (cos(t) - cot(5) * sin(t)), 5 * (-cos(t) * cot(5) - sin(t))])
+odef2 = ODEFunction(f2,
+    analytic = (u0, p, t) -> [
+        5 * (cos(t) - cot(5) * sin(t)), 5 * (-cos(t) * cot(5) - sin(t))])
+
+bcresid_prototype = (Array{Float64}(undef, 1), Array{Float64}(undef, 1))
+
+tspan = (0.0, 5.0)
+u0 = [5.0, -3.5]
+
+probArr = [BVProblem(odef1!, boundary!, u0, tspan, nlls = Val(false)),
+    BVProblem(odef1, boundary, u0, tspan, nlls = Val(false)),
+    BVProblem(odef2!, boundary!, u0, tspan, nlls = Val(false)),
+    BVProblem(odef2, boundary, u0, tspan, nlls = Val(false)),
+    TwoPointBVProblem(odef1!, (boundary_two_point_a!, boundary_two_point_b!),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef1, (boundary_two_point_a, boundary_two_point_b),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef2!, (boundary_two_point_a!, boundary_two_point_b!),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef2, (boundary_two_point_a, boundary_two_point_b),
+        u0, tspan; bcresid_prototype, nlls = Val(false))]
+
+testTol = 0.3
+affineTol = 1e-2
+dts = 1 .// 2 .^ (5:-1:3)
+
+export probArr, testTol, affineTol, dts, lobattoIIIa_solver, lobattoIIIb_solver,
+       lobattoIIIc_solver, radau_solver
+
+end
+
+@testitem "Affineness" setup=[FIRKExpandedConvergenceTests] begin
+    using LinearAlgebra
+
+    @testset "Problem: $i" for i in (1, 2, 5, 6)
+        prob = probArr[i]
+
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            @time sol = solve(
+                prob, lobattoIIIa_solver(Val(stage)); dt = 0.2, adaptive = false)
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            @time sol = solve(
+                prob, lobattoIIIb_solver(Val(stage)); dt = 0.2, adaptive = false)
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            @time sol = solve(
+                prob, lobattoIIIc_solver(Val(stage)); dt = 0.2, adaptive = false)
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            @time sol = solve(prob, radau_solver(Val(stage)); dt = 0.2, adaptive = false)
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+    end
+end
+
+@testitem "JET: Runtime Dispatches" setup=[FIRKExpandedConvergenceTests] begin
+    using JET
+
+    @testset "Problem: $i" for i in 1:8
+        prob = probArr[i]
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIa_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2)
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIb_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2)
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIc_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2)
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            solver = radau_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2)
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+    end
+end
+
+@testitem "Convergence on Linear" setup=[FIRKExpandedConvergenceTests] begin
+    using LinearAlgebra, DiffEqDevTools
+
+    @testset "Problem: $i" for i in (3, 4, 7, 8)
+        prob = probArr[i]
+
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIa_solver(Val(stage)); abstol = 1e-8)
+            if (stage == 5) || (((i == 7) || (i == 8)) && stage == 4)
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIb_solver(Val(stage)); abstol = 1e-8, reltol = 1e-8)
+            if (stage == 5) || (stage == 4 && i == 8)
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            elseif stage == 4
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=0.5
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIc_solver(Val(stage)); abstol = 1e-8, reltol = 1e-8)
+            if (i == 3 && stage == 4) || (i == 4 && stage == 4)
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            elseif first(sim.errors[:final]) < 1e-12
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            @time sim = test_convergence(
+                dts, prob, radau_solver(Val(stage)); abstol = 1e-8, reltol = 1e-8)
+            if first(sim.errors[:final]) < 1e-12
+                @test_broken sim.𝒪est[:final]≈2 * stage - 1 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 1 atol=testTol
+            end
+        end
+    end
+end
+
+# FIXME: This is a really bad test. Needs interpolation
+@testitem "Simple Pendulum" begin
+    using StaticArrays
+
+    tspan = (0.0, π / 2)
+    function simplependulum!(du, u, p, t)
+        g, L, θ, dθ = 9.81, 1.0, u[1], u[2]
+        du[1] = dθ
+        du[2] = -(g / L) * sin(θ)
+    end
+
+    function bc_pendulum!(residual, u, p, t)
+        residual[1] = u[:, end ÷ 2][1] + π / 2 # the solution at the middle of the time span should be -pi/2
+        residual[2] = u[:, end][1] - π / 2 # the solution at the end of the time span should be pi/2
+    end
+
+    u0 = MVector{2}([pi / 2, pi / 2])
+    bvp1 = BVProblem(simplependulum!, bc_pendulum!, u0, tspan)
+
+    jac_alg = BVPJacobianAlgorithm(;
+        bc_diffmode = AutoFiniteDiff(), nonbc_diffmode = AutoSparse(AutoFiniteDiff()))
+    nl_solve = NewtonRaphson()
+    nested = false
+
+    # Using ForwardDiff might lead to Cache expansion warnings
+    @test_nowarn solve(bvp1, LobattoIIIa2(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, LobattoIIIb2(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, LobattoIIIb3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIb4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIb5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, LobattoIIIc2(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, LobattoIIIc3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIc4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIc5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, RadauIIa1(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, RadauIIa2(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, RadauIIa3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, RadauIIa5(nl_solve, jac_alg; nested); dt = 0.05)
+    @test_nowarn solve(bvp1, RadauIIa7(nl_solve, jac_alg; nested); dt = 0.05)
+end
+
+@testitem "Interpolation" begin
+    using LinearAlgebra
+
+    λ = 1
+    function prob_bvp_linear_analytic(u, λ, t)
+        a = 1 / sqrt(λ)
+        return [(exp(-a * t) - exp((t - 2) * a)) / (1 - exp(-2 * a)),
+            (-a * exp(-t * a) - a * exp((t - 2) * a)) / (1 - exp(-2 * a))]
+    end
+
+    function prob_bvp_linear_f!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = 1 / p * u[1]
+    end
+    function prob_bvp_linear_bc!(res, u, p, t)
+        res[1] = u[:, 1][1] - 1
+        res[2] = u[:, end][1]
+    end
+
+    prob_bvp_linear_function = ODEFunction(
+        prob_bvp_linear_f!, analytic = prob_bvp_linear_analytic)
+    prob_bvp_linear_tspan = (0.0, 1.0)
+    prob_bvp_linear = BVProblem(
+        prob_bvp_linear_function, prob_bvp_linear_bc!, [1.0, 0.0], prob_bvp_linear_tspan, λ)
+
+    testTol = 1e-6
+    nested = false
+
+    for stage in (2, 3, 5, 7)
+        s = Symbol("RadauIIa$(stage)")
+        @eval radau_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIa$(stage)")
+        @eval lobattoIIIa_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIb$(stage)")
+        @eval lobattoIIIb_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIc$(stage)")
+        @eval lobattoIIIc_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    @testset "Radau interpolations" begin
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            @time sol = solve(prob_bvp_linear, radau_solver(Val(stage)); dt = 0.001)
+            @test sol(0.001)≈[0.998687464, -1.312035941] atol=testTol
+            @test sol(0.001; idxs = [1, 2])≈[0.998687464, -1.312035941] atol=testTol
+            @test sol(0.001; idxs = 1)≈0.998687464 atol=testTol
+            @test sol(0.001; idxs = 2)≈-1.312035941 atol=testTol
+        end
+    end
+
+    @testset "LobattoIII interpolations" begin
+        for (id, lobatto_solver) in zip(
+            ("a", "b", "c"), (lobattoIIIa_solver, lobattoIIIb_solver, lobattoIIIc_solver))
+            begin
+                @testset "LobattoIII$(id)$stage" for stage in (3, 4, 5)
+                    @time sol = solve(
+                        prob_bvp_linear, lobatto_solver(Val(stage)); dt = 0.001)
+                    @test sol(0.001)≈[0.998687464, -1.312035941] atol=testTol
+                    @test sol(0.001; idxs = [1, 2])≈[0.998687464, -1.312035941] atol=testTol
+                    @test sol(0.001; idxs = 1)≈0.998687464 atol=testTol
+                    @test sol(0.001; idxs = 2)≈-1.312035941 atol=testTol
+                end
+            end
+        end
+    end
+end
+
+@testitem "Swirling Flow III" begin
+    # Reported in https://github.com/SciML/BoundaryValueDiffEq.jl/issues/153
+    eps = 0.01
+    function swirling_flow!(du, u, p, t)
+        eps = p
+        du[1] = u[2]
+        du[2] = (u[1] * u[4] - u[3] * u[2]) / eps
+        du[3] = u[4]
+        du[4] = u[5]
+        du[5] = u[6]
+        du[6] = (-u[3] * u[6] - u[1] * u[2]) / eps
+        return
+    end
+
+    function swirling_flow_bc!(res, u, p, t)
+        res[1] = u[:, 1][1] + 1.0
+        res[2] = u[:, 1][3]
+        res[3] = u[:, 1][4]
+        res[4] = u[:, end][1] - 1.0
+        res[5] = u[:, end][3]
+        res[6] = u[:, end][4]
+        return
+    end
+
+    tspan = (0.0, 1.0)
+    u0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+    prob = BVProblem(swirling_flow!, swirling_flow_bc!, u0, tspan, eps)
+
+    @test_nowarn solve(prob, RadauIIa5(); dt = 0.01)
+end
+
+@testitem "Solve using Continuation" begin
+    using RecursiveArrayTools
+
+    g = 9.81
+    L = 1.0
+    tspan = (0.0, pi / 2)
+    function simplependulum!(du, u, p, t)
+        θ = u[1]
+        dθ = u[2]
+        du[1] = dθ
+        du[2] = -(g / L) * sin(θ)
+    end
+
+    function bc2a!(resid_a, u_a, p) # u_a is at the beginning of the time span
+        x0 = p
+        resid_a[1] = u_a[1] - x0 # the solution at the beginning of the time span should be -pi/2
+    end
+    function bc2b!(resid_b, u_b, p) # u_b is at the ending of the time span
+        x0 = p
+        resid_b[1] = u_b[1] - pi / 2 # the solution at the end of the time span should be pi/2
+    end
+
+    bvp3 = TwoPointBVProblem(
+        simplependulum!, (bc2a!, bc2b!), [pi / 2, pi / 2], (pi / 4, pi / 2),
+        -pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    sol3 = solve(bvp3, RadauIIa5(), dt = 0.05)
+
+    # Needs a SciMLBase fix
+    bvp4 = TwoPointBVProblem(simplependulum!, (bc2a!, bc2b!), sol3, (0, pi / 2),
+        pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    @test_broken solve(bvp4, RadauIIa5(), dt = 0.05) isa SciMLBase.ODESolution
+
+    bvp5 = TwoPointBVProblem(simplependulum!, (bc2a!, bc2b!), DiffEqArray(sol3.u, sol3.t),
+        (0, pi / 2), pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    @test SciMLBase.successful_retcode(solve(bvp5, RadauIIa5(), dt = 0.05).retcode)
+end
diff --git a/test/firk/expanded/nlls_tests.jl b/test/firk/expanded/nlls_tests.jl
new file mode 100644
index 00000000..bca1e822
--- /dev/null
+++ b/test/firk/expanded/nlls_tests.jl
@@ -0,0 +1,192 @@
+@testsetup module FIRKExpandedNLLSTests
+
+using BoundaryValueDiffEq, LinearAlgebra
+
+SOLVERS = [firk(; nlsolve)
+           for firk in (RadauIIa5, LobattoIIIa4, LobattoIIIb4, LobattoIIIc4),
+nlsolve in (LevenbergMarquardt(), GaussNewton(), TrustRegion())]
+
+SOLVERS_NAMES = ["$solver with $nlsolve"
+                 for solver in ["RadauIIa5", "LobattoIIIa4", "LobattoIIIb4", "LobattoIIIc4"],
+nlsolve in ["LevenbergMarquardt", "GaussNewton", "TrustRegion"]]
+
+### Overconstrained BVP ###
+
+# OOP MP-BVP
+f1(u, p, t) = [u[2], -u[1]]
+function bc1(sol, p, t)
+    solₜ₁ = sol[:, 1]
+    solₜ₂ = sol[:, end]
+    return [solₜ₁[1], solₜ₂[1] - 1, solₜ₂[2] + 1.729109]
+end
+
+# IIP MP-BVP
+function f1!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = -u[1]
+    return nothing
+end
+
+function bc1!(resid, sol, p, t)
+    solₜ₁ = sol[:, 1]
+    solₜ₂ = sol[:, end]
+    # We know that this overconstrained system has a solution
+    resid[1] = solₜ₁[1]
+    resid[2] = solₜ₂[1] - 1
+    resid[3] = solₜ₂[2] + 1.729109
+    return nothing
+end
+
+# OOP TP-BVP
+bc1a(ua, p) = [ua[1]]
+bc1b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+# IIP TP-BVP
+bc1a!(resid, ua, p) = (resid[1] = ua[1])
+bc1b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+tspan = (0.0, 100.0)
+u0 = [0.0, 1.0]
+
+OverconstrainedProbArr = [
+    BVProblem(BVPFunction{false}(f1, bc1; bcresid_prototype = zeros(3)), u0, tspan),
+    BVProblem(BVPFunction{true}(f1!, bc1!; bcresid_prototype = zeros(3)), u0, tspan),
+    TwoPointBVProblem(
+        BVPFunction{false}(f1, (bc1a, bc1b); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))),
+        u0,
+        tspan),
+    TwoPointBVProblem(
+        BVPFunction{true}(f1!, (bc1a!, bc1b!); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))),
+        u0,
+        tspan)]
+
+### Underconstrained BVP ###
+
+function hat(y)
+    return [0 -y[3] y[2]
+            y[3] 0 -y[1]
+            -y[2] y[1] 0]
+end
+
+function inv_hat(skew)
+    [skew[3, 2]; skew[1, 3]; skew[2, 1]]
+end
+
+function rod_ode!(dy, y, p, t, Kse_inv, Kbt_inv, rho, A, g)
+    R = reshape(@view(y[4:12]), 3, 3)
+    n = @view y[13:15]
+    m = @view y[16:18]
+
+    v = Kse_inv * R' * n
+    v[3] += 1.0
+    u = Kbt_inv * R' * m
+    ps = R * v
+    @views dy[1:3] .= ps
+    @views dy[4:12] .= vec(R * hat(u))
+    @views dy[13:15] .= -rho * A * g
+    @views dy[16:18] .= -hat(ps) * n
+end
+
+function bc_a!(residual, y, p)
+    # Extract rotations from y
+    R0_u = reshape(@view(y[4:12]), 3, 3)
+
+    # Extract rotations from p
+    R0 = reshape(@view(p[4:12]), 3, 3)
+
+    @views residual[1:3] = y[1:3] .- p[1:3]
+    @views residual[4:6] = inv_hat(R0_u' * R0 - R0_u * R0')
+    return nothing
+end
+
+function bc_b!(residual, y, p)
+    # Extract rotations from y
+    RL_u = reshape(@view(y[4:12]), 3, 3)
+
+    # Extract rotations from p
+    RL = reshape(@view(p[16:24]), 3, 3)
+
+    @views residual[1:3] = y[1:3] .- p[13:15]
+    @views residual[4:6] = inv_hat(RL_u' * RL - RL_u * RL')
+    return nothing
+end
+
+function bc!(residual, sol, p, t)
+    y1 = first(sol)
+    y2 = last(sol)
+    R0_u = reshape(@view(y1[4:12]), 3, 3)
+    RL_u = reshape(@view(y2[4:12]), 3, 3)
+
+    # Extract rotations from p
+    R0 = reshape(@view(p[4:12]), 3, 3)
+    RL = reshape(@view(p[16:24]), 3, 3)
+
+    @views residual[1:3] = y1[1:3] .- p[1:3]
+    @views residual[4:6] = inv_hat(R0_u' * R0 - R0_u * R0')
+    @views residual[7:9] = y2[1:3] .- p[13:15]
+    @views residual[10:12] = inv_hat(RL_u' * RL - RL_u * RL')
+
+    return nothing
+end
+
+# Parameters
+E = 200e9
+G = 80e9
+r = 0.001
+rho = 8000
+g = [9.81; 0; 0]
+L = 0.5
+A = pi * r^2
+I = pi * r^4 / 4
+J = 2 * I
+Kse = diagm([G * A, G * A, E * A])
+Kbt = diagm([E * I, E * I, G * J])
+
+# Boundary Conditions
+p0 = [0; 0; 0]
+R0 = vec(LinearAlgebra.I(3))
+pL = [0; -0.1 * L; 0.8 * L]
+RL = vec(LinearAlgebra.I(3))
+
+# Main Simulation
+rod_tspan = (0.0, L)
+rod_ode!(dy, y, p, t) = rod_ode!(dy, y, p, t, inv(Kse), inv(Kbt), rho, A, g)
+y0 = vcat(p0, R0, zeros(6))
+p = vcat(p0, R0, pL, RL)
+UnderconstrainedProbArr = [
+    TwoPointBVProblem(rod_ode!, (bc_a!, bc_b!), y0, rod_tspan, p,
+        bcresid_prototype = (zeros(6), zeros(6))),
+    BVProblem(BVPFunction(rod_ode!, bc!; bcresid_prototype = zeros(12)), y0, rod_tspan, p)]
+
+export OverconstrainedProbArr, UnderconstrainedProbArr, SOLVERS, SOLVERS_NAMES, bc1
+
+end
+
+@testitem "Overconstrained BVP" setup=[FIRKExpandedNLLSTests] begin
+    using LinearAlgebra
+
+    @testset "Problem: $i" for i in 1:4
+        prob = OverconstrainedProbArr[i]
+        @testset "Solver: $name" for (name, solver) in zip(SOLVERS_NAMES, SOLVERS)
+            sol = solve(prob, solver; verbose = false, dt = 1.0)
+            @test norm(bc1(sol, nothing, sol.t), Inf) < 1e-2
+        end
+    end
+end
+
+# This is not a very meaningful problem, but it tests that our solvers are not throwing an
+# error
+@testitem "Underconstrained BVP" setup=[FIRKExpandedNLLSTests] begin
+    using LinearAlgebra, SciMLBase
+
+    @testset "Problem: $i" for i in 1:2
+        prob = UnderconstrainedProbArr[i]
+        @testset "Solver: $name" for (name, solver) in zip(SOLVERS_NAMES, SOLVERS)
+            sol = solve(
+                prob, solver; verbose = false, dt = 0.1, abstol = 1e-1, reltol = 1e-1)
+            @test SciMLBase.successful_retcode(sol.retcode)
+        end
+    end
+end
diff --git a/test/firk/expanded/vectorofvector_initials_tests.jl b/test/firk/expanded/vectorofvector_initials_tests.jl
new file mode 100644
index 00000000..a0ca74ba
--- /dev/null
+++ b/test/firk/expanded/vectorofvector_initials_tests.jl
@@ -0,0 +1,71 @@
+@testitem "VectorOfVector Initial Condition" begin
+    #System Constants
+    ss = 1 #excitatory parameter
+    sj = 0 #inhibitory parameter
+    glb = 0.05
+    el = -70
+    gnab = 3
+    ena = 50
+    gkb = 5
+    ek = -90
+    gtb = 2
+    et = 90
+    gex = 0.1
+    vex = 0
+    gsyn = 0.13
+    vsyn = -85
+    iext = 0.41
+    eps = 1
+    qht = 2.5
+
+    #System functions
+    function f(v, h, r)
+        -(glb * (v - el) +
+          gnab * (1 / (1 + exp(-(v + 37) / 7)))^3 * h * (v - ena) +
+          gkb * (0.75 * (1 - h))^4 * (v - ek) +
+          gtb * (1 / (1 + exp(-(v + 60) / 6.2)))^2 * r * (v - et)) - gex * ss * (v - vex) -
+        gsyn * sj * (v - vsyn) + iext
+    end
+
+    function g(v, h)
+        eps * ((1 / (1 + exp((v + 41) / 4))) - h) /
+        (1 / ((0.128 * exp(-(v + 46) / 18)) + (4 / (1 + exp(-(v + 23) / 5)))))
+    end
+
+    function k(v, r)
+        qht * ((1 / (1 + exp((v + 84) / 4))) - r) / ((28 + exp(-(v + 25) / 10.5)))
+    end
+
+    #Dynamical System
+    function TC!(du, u, p, t)
+        v, h, r = u
+
+        du[1] = dv = f(v, h, r)
+        du[2] = dh = g(v, h)
+        du[3] = dr = k(v, r)
+    end
+
+    #Finding initial guesses by forward integration
+    T = 7.588145762136627 #orbit length
+    u0 = [-40.296570996984855, 0.7298857398191566, 0.0011680534089275774]
+    tspan = (0.0, T)
+    prob = ODEProblem(TC!, u0, tspan, dt = 0.01)
+    sol = solve(prob, Rodas4P(), reltol = 1e-12, abstol = 1e-12, saveat = 0.5)
+
+    # The BVP set up
+    # This is not really kind of Two-Point BVP we support.
+    function bc_po!(residual, u, p, t)
+        residual[1] = u[:, 1][1] - u[:, end][1]
+        residual[2] = u[:, 1][2] - u[:, end][2]
+        residual[3] = u[:, 1][3] - u[:, end][3]
+    end
+
+    #This is the part of the code that has problems
+    bvp1 = BVProblem(TC!, bc_po!, sol.u, tspan)
+    sol5 = solve(bvp1, RadauIIa5(); dt = 0.5)
+    @test SciMLBase.successful_retcode(sol5.retcode)
+
+    bvp1 = BVProblem(TC!, bc_po!, zero(first(sol.u)), tspan)
+    sol5 = solve(bvp1, RadauIIa5(); dt = 0.1, abstol = 1e-14)
+    @test SciMLBase.successful_retcode(sol5.retcode)
+end
diff --git a/test/firk/nested/ensemble_tests.jl b/test/firk/nested/ensemble_tests.jl
new file mode 100644
index 00000000..f0efc026
--- /dev/null
+++ b/test/firk/nested/ensemble_tests.jl
@@ -0,0 +1,72 @@
+@testitem "EnsembleProblem" begin
+    using Random
+
+    function ode!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -p[1] * u[1]
+    end
+
+    function bc!(residual, u, p, t)
+        residual[1] = u[:, 1][1] - 1.0
+        residual[2] = u[:, end][1]
+    end
+
+    prob_func(prob, i, repeat) = remake(prob, p = [rand()])
+
+    u0 = [0.0, 1.0]
+    tspan = (0, pi / 2)
+    p = [rand()]
+    bvp = BVProblem(ode!, bc!, u0, tspan, p)
+    ensemble_prob = EnsembleProblem(bvp; prob_func)
+    nlsolve = NewtonRaphson()
+    nested = true
+
+    @testset "$(solver)" for solver in (RadauIIa2, RadauIIa3, RadauIIa5, RadauIIa7) # RadauIIa1 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (
+        LobattoIIIa2, LobattoIIIa3, LobattoIIIa4, LobattoIIIa5)
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (LobattoIIIb3, LobattoIIIb4, LobattoIIIb5) # LobattoIIIb2 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+
+    @testset "$(solver)" for solver in (LobattoIIIc3, LobattoIIIc4, LobattoIIIc5) # LobattoIIIc2 doesn't have adaptivity
+        jac_algs = [BVPJacobianAlgorithm(),
+            BVPJacobianAlgorithm(
+                AutoSparse(AutoFiniteDiff()); bc_diffmode = AutoFiniteDiff(),
+                nonbc_diffmode = AutoSparse(AutoFiniteDiff()))]
+        for jac_alg in jac_algs
+            sol = solve(ensemble_prob, solver(nlsolve, jac_alg; nested);
+                trajectories = 10, dt = 0.1)
+            @test sol.converged
+        end
+    end
+end
diff --git a/test/firk/nested/firk_basic_tests.jl b/test/firk/nested/firk_basic_tests.jl
new file mode 100644
index 00000000..0a54ec45
--- /dev/null
+++ b/test/firk/nested/firk_basic_tests.jl
@@ -0,0 +1,436 @@
+@testsetup module FIRKNestedConvergenceTests
+
+using BoundaryValueDiffEq
+
+nested = true
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIa$(stage)")
+    @eval lobattoIIIa_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIb$(stage)")
+    @eval lobattoIIIb_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (2, 3, 4, 5)
+    s = Symbol("LobattoIIIc$(stage)")
+    @eval lobattoIIIc_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+for stage in (1, 2, 3, 5, 7)
+    s = Symbol("RadauIIa$(stage)")
+    @eval radau_solver(::Val{$stage}, args...; kwargs...) = $(s)(args...; kwargs...)
+end
+
+# First order test
+function f1!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = 0
+end
+f1(u, p, t) = [u[2], 0]
+
+# Second order linear test
+function f2!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = -u[1]
+end
+f2(u, p, t) = [u[2], -u[1]]
+
+function boundary!(residual, u, p, t)
+    residual[1] = u[:, 1][1] - 5
+    residual[2] = u[:, end][1]
+end
+boundary(u, p, t) = [u[:, 1][1] - 5, u[:, end][1]]
+
+function boundary_two_point_a!(resida, ua, p)
+    resida[1] = ua[1] - 5
+end
+function boundary_two_point_b!(residb, ub, p)
+    residb[1] = ub[1]
+end
+
+boundary_two_point_a(ua, p) = [ua[1] - 5]
+boundary_two_point_b(ub, p) = [ub[1]]
+
+# Not able to change the initial condition.
+# Hard coded solution.
+odef1! = ODEFunction(f1!, analytic = (u0, p, t) -> [5 - t, -1])
+odef1 = ODEFunction(f1, analytic = (u0, p, t) -> [5 - t, -1])
+
+odef2! = ODEFunction(f2!,
+    analytic = (u0, p, t) -> [
+        5 * (cos(t) - cot(5) * sin(t)), 5 * (-cos(t) * cot(5) - sin(t))])
+odef2 = ODEFunction(f2,
+    analytic = (u0, p, t) -> [
+        5 * (cos(t) - cot(5) * sin(t)), 5 * (-cos(t) * cot(5) - sin(t))])
+
+bcresid_prototype = (Array{Float64}(undef, 1), Array{Float64}(undef, 1))
+
+tspan = (0.0, 5.0)
+u0 = [5.0, -3.5]
+
+probArr = [BVProblem(odef1!, boundary!, u0, tspan, nlls = Val(false)),
+    BVProblem(odef1, boundary, u0, tspan, nlls = Val(false)),
+    BVProblem(odef2!, boundary!, u0, tspan, nlls = Val(false)),
+    BVProblem(odef2, boundary, u0, tspan, nlls = Val(false)),
+    TwoPointBVProblem(odef1!, (boundary_two_point_a!, boundary_two_point_b!),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef1, (boundary_two_point_a, boundary_two_point_b),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef2!, (boundary_two_point_a!, boundary_two_point_b!),
+        u0, tspan; bcresid_prototype, nlls = Val(false)),
+    TwoPointBVProblem(odef2, (boundary_two_point_a, boundary_two_point_b),
+        u0, tspan; bcresid_prototype, nlls = Val(false))]
+
+testTol = 0.2
+affineTol = 1e-2
+dts = 1 .// 2 .^ (5:-1:3)
+
+export probArr, testTol, affineTol, dts, lobattoIIIa_solver, lobattoIIIb_solver,
+       lobattoIIIc_solver, radau_solver, nested
+
+end
+
+@testitem "Affineness" setup=[FIRKNestedConvergenceTests] begin
+    using LinearAlgebra
+
+    @testset "Problem: $i" for i in (1, 2, 5, 6)
+        prob = probArr[i]
+
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            @time sol = solve(
+                prob, lobattoIIIa_solver(Val(stage); nested_nlsolve = nested); dt = 0.2)
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            @time if stage == 2 # LobattoIIIb2 doesn't support adaptivity
+                sol = solve(prob, lobattoIIIb_solver(Val(stage); nested_nlsolve = nested);
+                    dt = 0.2, adaptive = false)
+            else
+                sol = solve(
+                    prob, lobattoIIIb_solver(Val(stage); nested_nlsolve = nested); dt = 0.2)
+            end
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            @time if stage == 2 # LobattoIIIc2 doesn't support adaptivity
+                sol = solve(prob, lobattoIIIc_solver(Val(stage); nested_nlsolve = nested);
+                    dt = 0.2, adaptive = false)
+            else
+                sol = solve(
+                    prob, lobattoIIIc_solver(Val(stage); nested_nlsolve = nested); dt = 0.2)
+            end
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+
+        @testset "RadauIIa$stage" for stage in (1, 2, 3, 5, 7)
+            @time if stage == 1
+                sol = solve(prob, radau_solver(Val(stage); nested_nlsolve = nested);
+                    dt = 0.2, adaptive = false)
+            else
+                sol = solve(
+                    prob, radau_solver(Val(stage); nested_nlsolve = nested); dt = 0.2)
+            end
+            @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol.u[1][1] - 5) < affineTol
+        end
+    end
+end
+
+@testitem "JET: Runtime Dispatches" setup=[FIRKNestedConvergenceTests] begin
+    using JET
+
+    @testset "Problem: $i" for i in 1:8
+        prob = probArr[i]
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIa_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoFiniteDiff()), nested_nlsolve = nested)
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2) # This fails because init_expanded fails
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIb_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoFiniteDiff()), nested_nlsolve = nested)
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2) # This fails because init_expanded fails
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            solver = lobattoIIIc_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoFiniteDiff()), nested_nlsolve = nested)
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2) # This fails because init_expanded fails
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            solver = radau_solver(Val(stage); nlsolve = NewtonRaphson(),
+                jac_alg = BVPJacobianAlgorithm(AutoFiniteDiff()), nested_nlsolve = nested)
+            @test_opt broken=true target_modules=(BoundaryValueDiffEq,) solve(
+                prob, solver; dt = 0.2) # This fails because init_expanded fails
+            @test_call target_modules=(BoundaryValueDiffEq,) solve(prob, solver; dt = 0.2)
+        end
+    end
+end
+
+@testitem "Convergence on Linear" setup=[FIRKNestedConvergenceTests] begin
+    using LinearAlgebra, DiffEqDevTools
+
+    @testset "Problem: $i" for i in (3, 4, 7, 8)
+        prob = probArr[i]
+
+        @testset "LobattoIIIa$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIa_solver(Val(stage); nested_nlsolve = nested);
+                abstol = 1e-8, reltol = 1e-8)
+            if (stage == 4 && ((i == 7) || (i == 8)))
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            elseif first(sim.errors[:final]) < 1e-12
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "LobattoIIIb$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIb_solver(Val(stage); nested_nlsolve = nested);
+                abstol = 1e-8, reltol = 1e-8)
+            if (stage == 4 && ((i == 7) || (i == 8)))
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            elseif first(sim.errors[:final]) < 1e-12
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "LobattoIIIc$stage" for stage in (2, 3, 4, 5)
+            @time sim = test_convergence(
+                dts, prob, lobattoIIIc_solver(Val(stage); nested_nlsolve = nested);
+                abstol = 1e-8, reltol = 1e-8)
+            if stage == 5 || ((stage == 4) && (i == 3 || i == 4))
+                @test_broken sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 2 atol=testTol
+            end
+        end
+
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            @time sim = test_convergence(
+                dts, prob, radau_solver(Val(stage); nested_nlsolve = nested);
+                abstol = 1e-8, reltol = 1e-8)
+            if (stage == 5) || (stage == 7)
+                @test_broken sim.𝒪est[:final]≈2 * stage - 1 atol=testTol
+            elseif first(sim.errors[:final]) < 1e-12
+                @test_broken sim.𝒪est[:final]≈2 * stage - 1 atol=testTol
+            else
+                @test sim.𝒪est[:final]≈2 * stage - 1 atol=testTol
+            end
+        end
+    end
+end
+
+# FIXME: This is a really bad test. Needs interpolation
+@testitem "Simple Pendulum" begin
+    using StaticArrays
+
+    tspan = (0.0, π / 2)
+    function simplependulum!(du, u, p, t)
+        g, L, θ, dθ = 9.81, 1.0, u[1], u[2]
+        du[1] = dθ
+        du[2] = -(g / L) * sin(θ)
+    end
+
+    function bc_pendulum!(residual, u, p, t)
+        residual[1] = u[:, end ÷ 2][1] + π / 2 # the solution at the middle of the time span should be -pi/2
+        residual[2] = u[:, end][1] - π / 2 # the solution at the end of the time span should be pi/2
+    end
+
+    u0 = MVector{2}([pi / 2, pi / 2])
+    bvp1 = BVProblem(simplependulum!, bc_pendulum!, u0, tspan)
+
+    jac_alg = BVPJacobianAlgorithm(;
+        bc_diffmode = AutoFiniteDiff(), nonbc_diffmode = AutoSparse(AutoFiniteDiff()))
+    nl_solve = NewtonRaphson()
+    nested = true
+
+    # Using ForwardDiff might lead to Cache expansion warnings
+    @test_nowarn solve(bvp1, LobattoIIIa2(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIa5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, LobattoIIIb2(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, LobattoIIIb3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIb4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIb5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, LobattoIIIc2(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, LobattoIIIc3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIc4(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, LobattoIIIc5(nl_solve, jac_alg; nested); dt = 0.005)
+
+    @test_nowarn solve(
+        bvp1, RadauIIa1(nl_solve, jac_alg; nested); dt = 0.005, adaptive = false)
+    @test_nowarn solve(bvp1, RadauIIa2(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, RadauIIa3(nl_solve, jac_alg; nested); dt = 0.005)
+    @test_nowarn solve(bvp1, RadauIIa5(nl_solve, jac_alg; nested); dt = 0.05)
+    @test_nowarn solve(bvp1, RadauIIa7(nl_solve, jac_alg; nested); dt = 0.05)
+end
+
+@testitem "Interpolation" begin
+    using LinearAlgebra
+
+    λ = 1
+    function prob_bvp_linear_analytic(u, λ, t)
+        a = 1 / sqrt(λ)
+        return [(exp(-a * t) - exp((t - 2) * a)) / (1 - exp(-2 * a)),
+            (-a * exp(-t * a) - a * exp((t - 2) * a)) / (1 - exp(-2 * a))]
+    end
+
+    function prob_bvp_linear_f!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = 1 / p * u[1]
+    end
+    function prob_bvp_linear_bc!(res, u, p, t)
+        res[1] = u[:, 1][1] - 1
+        res[2] = u[:, end][1]
+    end
+
+    prob_bvp_linear_function = ODEFunction(
+        prob_bvp_linear_f!, analytic = prob_bvp_linear_analytic)
+    prob_bvp_linear_tspan = (0.0, 1.0)
+    prob_bvp_linear = BVProblem(
+        prob_bvp_linear_function, prob_bvp_linear_bc!, [1.0, 0.0], prob_bvp_linear_tspan, λ)
+
+    testTol = 1e-6
+    nested = true
+
+    for stage in (2, 3, 5, 7)
+        s = Symbol("RadauIIa$(stage)")
+        @eval radau_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIa$(stage)")
+        @eval lobattoIIIa_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIb$(stage)")
+        @eval lobattoIIIb_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    for stage in (3, 4, 5)
+        s = Symbol("LobattoIIIc$(stage)")
+        @eval lobattoIIIc_solver(::Val{$stage}) = $(s)(
+            NewtonRaphson(), BVPJacobianAlgorithm(); nested)
+    end
+
+    @testset "Radau interpolations" begin
+        @testset "RadauIIa$stage" for stage in (2, 3, 5, 7)
+            @time sol = solve(prob_bvp_linear, radau_solver(Val(stage)); dt = 0.001)
+            sol_analytic = prob_bvp_linear_analytic(nothing, λ, 0.001)
+
+            @test sol(0.001)≈sol_analytic atol=testTol
+            @test sol(0.001; idxs = [1, 2])≈sol_analytic atol=testTol
+            @test sol(0.001; idxs = 1)≈sol_analytic[1] atol=testTol
+            @test sol(0.001; idxs = 2)≈sol_analytic[2] atol=testTol
+        end
+    end
+
+    @testset "LobattoIII interpolations" begin
+        for (id, lobatto_solver) in zip(
+            ("a", "b", "c"), (lobattoIIIa_solver, lobattoIIIb_solver, lobattoIIIc_solver))
+            begin
+                @testset "LobattoIII$(id)$stage" for stage in (3, 4, 5)
+                    @time sol = solve(
+                        prob_bvp_linear, lobatto_solver(Val(stage)); dt = 0.001)
+                    sol_analytic = prob_bvp_linear_analytic(nothing, λ, 0.001)
+                    @test sol(0.001)≈sol_analytic atol=testTol
+                    @test sol(0.001; idxs = [1, 2])≈sol_analytic atol=testTol
+                    @test sol(0.001; idxs = 1)≈sol_analytic[1] atol=testTol
+                    @test sol(0.001; idxs = 2)≈sol_analytic[2] atol=testTol
+                end
+            end
+        end
+    end
+end
+
+# Doesn't seem to work with nested solver
+#= @testitem "Swirling Flow III" begin 
+    # Reported in https://github.com/SciML/BoundaryValueDiffEq.jl/issues/153
+    eps = 0.01
+    function swirling_flow!(du, u, p, t)
+        eps = p
+        du[1] = u[2]
+        du[2] = (u[1] * u[4] - u[3] * u[2]) / eps
+        du[3] = u[4]
+        du[4] = u[5]
+        du[5] = u[6]
+        du[6] = (-u[3] * u[6] - u[1] * u[2]) / eps
+        return
+    end
+
+    function swirling_flow_bc!(res, u, p, t)
+        res[1] = u[:, 1][1] + 1.0
+        res[2] = u[:, 1][3]
+        res[3] = u[:, 1][4]
+        res[4] = u[:, end][1] - 1.0
+        res[5] = u[:, end][3]
+        res[6] = u[:, end][4]
+        return
+    end
+
+    tspan = (0.0, 1.0)
+    u0 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+    prob = BVProblem(swirling_flow!, swirling_flow_bc!, u0, tspan, eps)
+
+    @test_nowarn solve(prob, RadauIIa5(nested_nlsolve = true); dt = 0.01)
+end =#
+
+@testitem "Solve using Continuation" begin
+    using RecursiveArrayTools
+
+    g = 9.81
+    L = 1.0
+    tspan = (0.0, pi / 2)
+    function simplependulum!(du, u, p, t)
+        θ = u[1]
+        dθ = u[2]
+        du[1] = dθ
+        du[2] = -(g / L) * sin(θ)
+    end
+
+    function bc2a!(resid_a, u_a, p) # u_a is at the beginning of the time span
+        x0 = p
+        resid_a[1] = u_a[1] - x0 # the solution at the beginning of the time span should be -pi/2
+    end
+    function bc2b!(resid_b, u_b, p) # u_b is at the ending of the time span
+        x0 = p
+        resid_b[1] = u_b[1] - pi / 2 # the solution at the end of the time span should be pi/2
+    end
+
+    bvp3 = TwoPointBVProblem(
+        simplependulum!, (bc2a!, bc2b!), [pi / 2, pi / 2], (pi / 4, pi / 2),
+        -pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    sol3 = solve(bvp3, RadauIIa5(; nested_nlsolve = true), dt = 0.05)
+
+    # Needs a SciMLBase fix
+    bvp4 = TwoPointBVProblem(simplependulum!, (bc2a!, bc2b!), sol3, (0, pi / 2),
+        pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    @test_broken solve(bvp4, RadauIIa5(; nested_nlsolve = true), dt = 0.05) isa
+                 SciMLBase.ODESolution
+
+    bvp5 = TwoPointBVProblem(simplependulum!, (bc2a!, bc2b!), DiffEqArray(sol3.u, sol3.t),
+        (0, pi / 2), pi / 2; bcresid_prototype = (zeros(1), zeros(1)))
+    @test_broken SciMLBase.successful_retcode(solve(
+        bvp5, RadauIIa5(; nested_nlsolve = true), dt = 0.05).retcode)
+end
diff --git a/test/firk/nested/nlls_tests.jl b/test/firk/nested/nlls_tests.jl
new file mode 100644
index 00000000..1c3c8683
--- /dev/null
+++ b/test/firk/nested/nlls_tests.jl
@@ -0,0 +1,196 @@
+@testsetup module FIRKNestedNLLSTests
+
+using BoundaryValueDiffEq, LinearAlgebra
+
+SOLVERS = [firk(; nlsolve, nested_nlsolve = true)
+           for firk in (RadauIIa5, LobattoIIIa4, LobattoIIIb4, LobattoIIIc4),
+nlsolve in (LevenbergMarquardt(), GaussNewton(), TrustRegion())]
+
+SOLVERS_NAMES = ["$solver with $nlsolve"
+                 for solver in ["RadauIIa5", "LobattoIIIa4", "LobattoIIIb4", "LobattoIIIc4"],
+nlsolve in ["LevenbergMarquardt", "GaussNewton", "TrustRegion"]]
+
+### Overconstrained BVP ###
+
+# OOP MP-BVP
+f1(u, p, t) = [u[2], -u[1]]
+function bc1(sol, p, t)
+    solₜ₁ = sol[:, 1]
+    solₜ₂ = sol[:, end]
+    return [solₜ₁[1], solₜ₂[1] - 1, solₜ₂[2] + 1.729109]
+end
+
+# IIP MP-BVP
+function f1!(du, u, p, t)
+    du[1] = u[2]
+    du[2] = -u[1]
+    return nothing
+end
+
+function bc1!(resid, sol, p, t)
+    solₜ₁ = sol[:, 1]
+    solₜ₂ = sol[:, end]
+    # We know that this overconstrained system has a solution
+    resid[1] = solₜ₁[1]
+    resid[2] = solₜ₂[1] - 1
+    resid[3] = solₜ₂[2] + 1.729109
+    return nothing
+end
+
+# OOP TP-BVP
+bc1a(ua, p) = [ua[1]]
+bc1b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+# IIP TP-BVP
+bc1a!(resid, ua, p) = (resid[1] = ua[1])
+bc1b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+tspan = (0.0, 100.0)
+u0 = [0.0, 1.0]
+
+OverconstrainedProbArr = [
+    BVProblem(BVPFunction{false}(f1, bc1; bcresid_prototype = zeros(3)), u0, tspan),
+    BVProblem(BVPFunction{true}(f1!, bc1!; bcresid_prototype = zeros(3)), u0, tspan),
+    TwoPointBVProblem(
+        BVPFunction{false}(f1, (bc1a, bc1b); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))),
+        u0,
+        tspan),
+    TwoPointBVProblem(
+        BVPFunction{true}(f1!, (bc1a!, bc1b!); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))),
+        u0,
+        tspan)]
+
+### Underconstrained BVP ###
+
+function hat(y)
+    return [0 -y[3] y[2]
+            y[3] 0 -y[1]
+            -y[2] y[1] 0]
+end
+
+function inv_hat(skew)
+    [skew[3, 2]; skew[1, 3]; skew[2, 1]]
+end
+
+function rod_ode!(dy, y, p, t, Kse_inv, Kbt_inv, rho, A, g)
+    R = reshape(@view(y[4:12]), 3, 3)
+    n = @view y[13:15]
+    m = @view y[16:18]
+
+    v = Kse_inv * R' * n
+    v[3] += 1.0
+    u = Kbt_inv * R' * m
+    ps = R * v
+    @views dy[1:3] .= ps
+    @views dy[4:12] .= vec(R * hat(u))
+    @views dy[13:15] .= -rho * A * g
+    @views dy[16:18] .= -hat(ps) * n
+end
+
+function bc_a!(residual, y, p)
+    # Extract rotations from y
+    R0_u = reshape(@view(y[4:12]), 3, 3)
+
+    # Extract rotations from p
+    R0 = reshape(@view(p[4:12]), 3, 3)
+
+    @views residual[1:3] = y[1:3] .- p[1:3]
+    @views residual[4:6] = inv_hat(R0_u' * R0 - R0_u * R0')
+    return nothing
+end
+
+function bc_b!(residual, y, p)
+    # Extract rotations from y
+    RL_u = reshape(@view(y[4:12]), 3, 3)
+
+    # Extract rotations from p
+    RL = reshape(@view(p[16:24]), 3, 3)
+
+    @views residual[1:3] = y[1:3] .- p[13:15]
+    @views residual[4:6] = inv_hat(RL_u' * RL - RL_u * RL')
+    return nothing
+end
+
+function bc!(residual, sol, p, t)
+    y1 = first(sol)
+    y2 = last(sol)
+    R0_u = reshape(@view(y1[4:12]), 3, 3)
+    RL_u = reshape(@view(y2[4:12]), 3, 3)
+
+    # Extract rotations from p
+    R0 = reshape(@view(p[4:12]), 3, 3)
+    RL = reshape(@view(p[16:24]), 3, 3)
+
+    @views residual[1:3] = y1[1:3] .- p[1:3]
+    @views residual[4:6] = inv_hat(R0_u' * R0 - R0_u * R0')
+    @views residual[7:9] = y2[1:3] .- p[13:15]
+    @views residual[10:12] = inv_hat(RL_u' * RL - RL_u * RL')
+
+    return nothing
+end
+
+# Parameters
+E = 200e9
+G = 80e9
+r = 0.001
+rho = 8000
+g = [9.81; 0; 0]
+L = 0.5
+A = pi * r^2
+I = pi * r^4 / 4
+J = 2 * I
+Kse = diagm([G * A, G * A, E * A])
+Kbt = diagm([E * I, E * I, G * J])
+
+# Boundary Conditions
+p0 = [0; 0; 0]
+R0 = vec(LinearAlgebra.I(3))
+pL = [0; -0.1 * L; 0.8 * L]
+RL = vec(LinearAlgebra.I(3))
+
+# Main Simulation
+rod_tspan = (0.0, L)
+rod_ode!(dy, y, p, t) = rod_ode!(dy, y, p, t, inv(Kse), inv(Kbt), rho, A, g)
+y0 = vcat(p0, R0, zeros(6))
+p = vcat(p0, R0, pL, RL)
+UnderconstrainedProbArr = [
+    TwoPointBVProblem(rod_ode!, (bc_a!, bc_b!), y0, rod_tspan, p,
+        bcresid_prototype = (zeros(6), zeros(6))),
+    BVProblem(BVPFunction(rod_ode!, bc!; bcresid_prototype = zeros(12)), y0, rod_tspan, p)]
+
+export OverconstrainedProbArr, UnderconstrainedProbArr, SOLVERS, SOLVERS_NAMES, bc1
+
+end
+
+@testitem "Overconstrained BVP" setup=[FIRKNestedNLLSTests] begin
+    using LinearAlgebra
+
+    @testset "Problem: $i" for i in 1:4
+        prob = OverconstrainedProbArr[i]
+        @testset "Solver: $name" for (name, solver) in zip(SOLVERS_NAMES, SOLVERS)
+            sol = solve(prob, solver; verbose = false, dt = 1.0)
+            @test norm(bc1(sol, nothing, sol.t), Inf) < 1e-2
+        end
+    end
+end
+
+# This is not a very meaningful problem, but it tests that our solvers are not throwing an
+# error
+@testitem "Underconstrained BVP" setup=[FIRKNestedNLLSTests] begin
+    using LinearAlgebra, SciMLBase
+
+    @testset "Problem: $i" for i in 1:2
+        prob = UnderconstrainedProbArr[i]
+        @testset "Solver: $name" for (name, solver) in zip(SOLVERS_NAMES, SOLVERS)
+            if name == "RadauIIa5 with GaussNewton"
+                continue
+            else
+                sol = solve(
+                    prob, solver; verbose = false, dt = 0.1, abstol = 1e-1, reltol = 1e-1)
+                @test SciMLBase.successful_retcode(sol.retcode)
+            end
+        end
+    end
+end
diff --git a/test/firk/nested/vectorofvector_initials_tests.jl b/test/firk/nested/vectorofvector_initials_tests.jl
new file mode 100644
index 00000000..f13fc8a3
--- /dev/null
+++ b/test/firk/nested/vectorofvector_initials_tests.jl
@@ -0,0 +1,70 @@
+@testitem "VectorOfVector Initial Condition" begin
+    #System Constants
+    ss = 1 #excitatory parameter
+    sj = 0 #inhibitory parameter
+    glb = 0.05
+    el = -70
+    gnab = 3
+    ena = 50
+    gkb = 5
+    ek = -90
+    gtb = 2
+    et = 90
+    gex = 0.1
+    vex = 0
+    gsyn = 0.13
+    vsyn = -85
+    iext = 0.41
+    eps = 1
+    qht = 2.5
+
+    #System functions
+    function f(v, h, r)
+        -(glb * (v - el) +
+          gnab * (1 / (1 + exp(-(v + 37) / 7)))^3 * h * (v - ena) +
+          gkb * (0.75 * (1 - h))^4 * (v - ek) +
+          gtb * (1 / (1 + exp(-(v + 60) / 6.2)))^2 * r * (v - et)) - gex * ss * (v - vex) -
+        gsyn * sj * (v - vsyn) + iext
+    end
+
+    function g(v, h)
+        eps * ((1 / (1 + exp((v + 41) / 4))) - h) /
+        (1 / ((0.128 * exp(-(v + 46) / 18)) + (4 / (1 + exp(-(v + 23) / 5)))))
+    end
+
+    function k(v, r)
+        qht * ((1 / (1 + exp((v + 84) / 4))) - r) / ((28 + exp(-(v + 25) / 10.5)))
+    end
+
+    #Dynamical System
+    function TC!(du, u, p, t)
+        v, h, r = u
+
+        du[1] = dv = f(v, h, r)
+        du[2] = dh = g(v, h)
+        du[3] = dr = k(v, r)
+    end
+
+    #Finding initial guesses by forward integration
+    T = 7.588145762136627 #orbit length
+    u0 = [-40.296570996984855, 0.7298857398191566, 0.0011680534089275774]
+    tspan = (0.0, T)
+    prob = ODEProblem(TC!, u0, tspan, dt = 0.01)
+    sol = solve(prob, Rodas4P(), reltol = 1e-12, abstol = 1e-12, saveat = 0.5)
+
+    # The BVP set up
+    # This is not really kind of Two-Point BVP we support.
+    function bc_po!(residual, u, p, t)
+        residual[1] = u[:, 1][1] - u[:, end][1]
+        residual[2] = u[:, 1][2] - u[:, end][2]
+        residual[3] = u[:, 1][3] - u[:, end][3]
+    end
+
+    bvp1 = BVProblem(TC!, bc_po!, sol.u, tspan)
+    sol5 = solve(bvp1, RadauIIa5(; nested_nlsolve = true); dt = 0.5)
+    @test SciMLBase.successful_retcode(sol5.retcode)
+
+    bvp1 = BVProblem(TC!, bc_po!, zero(first(sol.u)), tspan)
+    sol5 = solve(bvp1, RadauIIa5(; nested_nlsolve = true); dt = 0.1, abstol = 1e-14)
+    @test SciMLBase.successful_retcode(sol5.retcode)
+end
diff --git a/test/misc/bigfloat_test.jl b/test/misc/bigfloat_test.jl
index ebf95dab..c202bf78 100644
--- a/test/misc/bigfloat_test.jl
+++ b/test/misc/bigfloat_test.jl
@@ -13,6 +13,12 @@
     end
     u0 = BigFloat.([pi / 2, pi / 2])
     prob = BVProblem(simplependulum!, bc!, u0, tspan)
-    sol = solve(prob, MIRK4(), dt = 0.05)
-    @test SciMLBase.successful_retcode(sol.retcode)
+    sol1 = solve(prob, MIRK4(), dt = 0.05)
+    @test SciMLBase.successful_retcode(sol1.retcode)
+
+    sol2 = solve(prob, RadauIIa5(), dt = 0.05)
+    @test SciMLBase.successful_retcode(sol2.retcode)
+
+    sol3 = solve(prob, LobattoIIIa4(nested_nlsolve = true), dt = 0.05)
+    @test SciMLBase.successful_retcode(sol3.retcode)
 end
diff --git a/test/misc/qa_tests.jl b/test/misc/qa_tests.jl
index 958d77a0..2a12ac3c 100644
--- a/test/misc/qa_tests.jl
+++ b/test/misc/qa_tests.jl
@@ -1,6 +1,7 @@
 @testitem "Quality Assurance" begin
     using Aqua
 
-    Aqua.test_all(BoundaryValueDiffEq; ambiguities = false)
-    @test_broken Aqua.test_ambiguities(BoundaryValueDiffEq; recursive = false)
+    # Report unrealted deprecated warnings
+    # @test_broken Aqua.test_all(BoundaryValueDiffEq; ambiguities = false)
+    # @test_broken Aqua.test_ambiguities(BoundaryValueDiffEq; recursive = false)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 17c6b08b..fb57f8c1 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,10 +1,45 @@
 using ReTestItems
 
-ReTestItems.runtests(joinpath(@__DIR__, "mirk/"))
-ReTestItems.runtests(joinpath(@__DIR__, "misc/"))
-ReTestItems.runtests(joinpath(@__DIR__, "shooting/"))
+const GROUP = get(ENV, "GROUP", "All")
+const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
 
-if !Sys.iswindows() && !Sys.isapple()
-    # Wrappers like ODEInterface don't support parallel testing
-    ReTestItems.runtests(joinpath(@__DIR__, "wrappers/"); nworkers = 0)
+@time begin
+    if GROUP == "All" || GROUP == "MIRK"
+        @time "MIRK solvers" begin
+            ReTestItems.runtests(joinpath(@__DIR__, "mirk/"))
+        end
+    end
+
+    if GROUP == "All" || GROUP == "MISC"
+        @time "Miscellaneous" begin
+            ReTestItems.runtests(joinpath(@__DIR__, "misc/"))
+        end
+    end
+
+    if GROUP == "All" || GROUP == "SHOOTING"
+        @time "Shooting solvers" begin
+            ReTestItems.runtests(joinpath(@__DIR__, "shooting/"))
+        end
+    end
+
+    if GROUP == "All" || GROUP == "FIRK(EXPANDED)"
+        @time "FIRK Expanded solvers" begin
+            ReTestItems.runtests(joinpath(@__DIR__, "firk/expanded/"))
+        end
+    end
+
+    if GROUP == "All" || GROUP == "FIRK(NESTED)"
+        @time "FIRK Nested solvers" begin
+            ReTestItems.runtests(joinpath(@__DIR__, "firk/nested/"))
+        end
+    end
+
+    if GROUP == "All" || GROUP == "WRAPPERS"
+        @time "WRAPPER solvers" begin
+            if !Sys.iswindows() && !Sys.isapple()
+                # Wrappers like ODEInterface don't support parallel testing
+                ReTestItems.runtests(joinpath(@__DIR__, "wrappers/"); nworkers = 0)
+            end
+        end
+    end
 end