diff --git a/.gitignore b/.gitignore
index d1d98eec..0189694e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ docs/build/
 anaconda_projects/
 modovmc
 pestotv
+Vacuum3D_temp/
diff --git a/Project.toml b/Project.toml
index af559a6c..043a74c0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -8,6 +8,7 @@ version = "0.1.0"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
+FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
@@ -16,6 +17,7 @@ OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
@@ -25,6 +27,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 DiffEqCallbacks = "4.9.0"
 Documenter = "1.14.1"
 FFTW = "1.9.0"
+FastGaussQuadrature = "1"
 HDF5 = "0.17.2"
 Interpolations = "0.16.1"
 JLD2 = "0.6.3"
@@ -33,6 +36,7 @@ OrdinaryDiffEq = "6.102.0"
 Pkg = "1.11.0"
 Plots = "1.40.15"
 Printf = "1.11.0"
+SparseArrays = "1.11.0"
 SpecialFunctions = "2.5.1"
 StaticArrays = "1.9.15"
 TOML = "1.0.3"
diff --git a/benchmarks/Solovev_ideal_example/vacuum_3D_convergence_benchmark.jl b/benchmarks/Solovev_ideal_example/vacuum_3D_convergence_benchmark.jl
new file mode 100644
index 00000000..705a71ce
--- /dev/null
+++ b/benchmarks/Solovev_ideal_example/vacuum_3D_convergence_benchmark.jl
@@ -0,0 +1,407 @@
+"""
+Convergence benchmark for compute_vacuum_response_3D
+
+This script tests how the wv matrix converges with grid resolution for the Solovev
+equilibrium. It initializes the equilibrium from sol.toml and constructs vacuum
+inputs following the logic in Free.jl.
+
+Scans performed:
+1. 3D convergence: mtheta varying with nzeta/mtheta = 1.5 fixed
+2. 2D convergence: mtheta varying (standard 2D vacuum response)
+3. 2D vs 3D comparison at matched mtheta values
+
+Outputs:
+- Convergence plots
+- Runtime scaling data
+- Relative error metrics vs. high-resolution reference
+"""
+
+using Pkg
+Pkg.activate("$(@__DIR__)/../..")
+
+using JPEC
+using JPEC.Vacuum
+using JPEC.Equilibrium
+using JPEC: Spl
+using LinearAlgebra
+using Printf
+using Plots
+using TOML
+
+default(; markersize=4, linewidth=2)
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+"""
+Compute vacuum inputs from equilibrium, following Free.jl logic.
+"""
+function compute_vacuum_inputs_from_equil(equil::PlasmaEquilibrium, mthvac::Int, mlow::Int, mpert::Int, n::Int)
+    # Get boundary flux value (psilim = 1.0 for normalized coordinates)
+    psilim = equil.config.control.psihigh
+
+    # Allocate arrays
+    mtheta_eq = equil.config.control.mtheta + 1
+    R = zeros(Float64, mtheta_eq)
+    Z = zeros(Float64, mtheta_eq)
+    ν = zeros(Float64, mtheta_eq)
+    r_minor = zeros(Float64, mtheta_eq)
+    θ_SFL = zeros(Float64, mtheta_eq)
+
+    # Compute geometric quantities on plasma boundary
+    for (i, θ) in enumerate(equil.rzphi.ys)
+        f = Spl.bicube_eval!(equil.rzphi, psilim, θ)
+        r_minor[i] = sqrt(f[1])
+        θ_SFL[i] = 2π * (θ + f[2])  # f[2] = λ(ψ, θ) / 2π
+        ν[i] = f[3]
+    end
+
+    # Compute R and Z on straight-fieldline θ grid
+    R .= equil.ro .+ r_minor .* cos.(θ_SFL)
+    Z .= equil.zo .+ r_minor .* sin.(θ_SFL)
+
+    return Vacuum.VacuumInput(;
+        r=reverse(R),
+        z=reverse(Z),
+        ν=reverse(ν),
+        mlow=mlow,
+        mpert=mpert,
+        n=n,
+        mtheta=mthvac,
+        force_wv_symmetry=true
+    )
+end
+
+"""
+Compute accuracy metrics between two wv matrices.
+Returns: (relative_frobenius_norm, max_absolute_error, max_eigenvalue_error)
+"""
+function compute_accuracy_metrics(wv_test::Matrix, wv_ref::Matrix)
+    diff = wv_test - wv_ref
+    frobenius_norm = norm(diff)
+    relative_frobenius = frobenius_norm / norm(wv_ref)
+    max_abs_error = maximum(abs.(diff))
+
+    # Eigenvalue comparison
+    ev_test = maximum(real.(eigvals(wv_test)))
+    ev_ref = maximum(real.(eigvals(wv_ref)))
+    ev_rel_error = abs(ev_test - ev_ref) / abs(ev_ref)
+
+    return relative_frobenius, max_abs_error, ev_rel_error
+end
+
+# ============================================================================
+# Setup equilibrium and parameters
+# ============================================================================
+
+println("="^70)
+println("3D VACUUM RESPONSE CONVERGENCE BENCHMARK")
+println("="^70)
+
+# Load equilibrium from Solovev example
+example_dir = joinpath(@__DIR__, "../../examples/Solovev_ideal_example")
+equil = Equilibrium.setup_equilibrium(joinpath(example_dir, "equil.toml"))
+
+# Load DCON settings to get mode numbers
+dcon_inputs = TOML.parsefile(joinpath(example_dir, "dcon.toml"))
+wall_inputs = dcon_inputs["WALL"]
+wall_settings = Vacuum.WallShapeSettings(; (Symbol(k) => v for (k, v) in wall_inputs)...)
+
+# Determine mode numbers (following Main.jl logic)
+nlow = dcon_inputs["DCON_CONTROL"]["nn_low"]
+nhigh = dcon_inputs["DCON_CONTROL"]["nn_high"]
+npert = nhigh - nlow + 1
+
+# Poloidal mode range (simplified version of Main.jl logic)
+mlow = trunc(Int, min(nlow * equil.params.qmin, 0)) - 4
+mhigh = trunc(Int, nhigh * equil.params.qmax)
+mpert = mhigh - mlow + 1
+
+println("\nEquilibrium loaded from: $example_dir")
+println("  Major radius R0 = $(equil.ro)")
+println("  q on axis       = $(equil.params.qmin)")
+println("  q at edge       = $(equil.params.qmax)")
+println("\nMode numbers:")
+println("  n range: $nlow to $nhigh (npert = $npert)")
+println("  m range: $mlow to $mhigh (mpert = $mpert)")
+println("  Total modes: $(mpert * npert)")
+
+# ============================================================================
+# Convergence scan parameters
+# ============================================================================
+
+# Fixed aspect ratio for 3D: nzeta/mtheta = 1.5
+aspect_ratio_grid = 1.5
+
+# Mtheta values chosen to give total grid points from ~300 to ~30000
+# Total points = mtheta * nzeta = mtheta * (1.5 * mtheta) = 1.5 * mtheta^2
+# mtheta=14 -> 294 points, mtheta=140 -> 29400 points
+mtheta_values = [14, 20, 28, 40, 56, 80]
+nzeta_values = [round(Int, aspect_ratio_grid * m) for m in mtheta_values]
+total_points = [m * n for (m, n) in zip(mtheta_values, nzeta_values)]
+
+# Reference resolution (high resolution)
+mtheta_ref = 112
+nzeta_ref = round(Int, aspect_ratio_grid * mtheta_ref)
+
+println("\n" * "="^70)
+println("SCAN PARAMETERS")
+println("="^70)
+println("  Grid aspect ratio (nzeta/mtheta): $aspect_ratio_grid")
+println("  Mtheta values: $mtheta_values")
+println("  Nzeta values:  $nzeta_values")
+println("  Total points:  $total_points")
+println("  Reference: mtheta=$mtheta_ref, nzeta=$nzeta_ref ($(mtheta_ref * nzeta_ref) points)")
+
+# ============================================================================
+# Compute reference solutions (3D and 2D)
+# ============================================================================
+
+println("\n" * "="^70)
+println("COMPUTING REFERENCE SOLUTIONS")
+println("="^70)
+
+# 3D reference
+println("\n3D Reference (mtheta=$mtheta_ref, nzeta=$nzeta_ref)...")
+vac_inputs_2D_ref = compute_vacuum_inputs_from_equil(equil, mtheta_ref, mlow, mpert, nlow)
+vac_inputs_3D_ref = Vacuum.VacuumInput3D(vac_inputs_2D_ref, nzeta_ref, nlow, npert)
+
+t_ref_3D = @elapsed begin
+    wv_ref_3D, _, _, _ = Vacuum.compute_vacuum_response_3D(vac_inputs_3D_ref, wall_settings)
+end
+println("  Time: $(@sprintf "%.2f" t_ref_3D) s")
+println("  wv matrix size: $(size(wv_ref_3D))")
+println("  Max eigenvalue: $(@sprintf "%.6e" maximum(real.(eigvals(wv_ref_3D))))")
+
+# 2D reference (use same mtheta as 3D reference)
+println("\n2D Reference (mtheta=$mtheta_ref)...")
+t_ref_2D = @elapsed begin
+    wv_ref_2D, _, _ = Vacuum.compute_vacuum_response(vac_inputs_2D_ref, wall_settings)
+end
+println("  Time: $(@sprintf "%.2f" t_ref_2D) s")
+println("  wv matrix size: $(size(wv_ref_2D))")
+println("  Max eigenvalue: $(@sprintf "%.6e" maximum(real.(eigvals(wv_ref_2D))))")
+
+# ============================================================================
+# 3D Convergence scan (fixed aspect ratio)
+# ============================================================================
+
+println("\n" * "="^70)
+println("3D CONVERGENCE SCAN (nzeta/mtheta = $aspect_ratio_grid)")
+println("="^70)
+
+rel_errors_3D = Float64[]
+ev_errors_3D = Float64[]
+times_3D = Float64[]
+
+for (i, mth) in enumerate(mtheta_values)
+    nz = nzeta_values[i]
+    print("  mtheta=$mth, nzeta=$nz ($(mth*nz) pts) ... ")
+
+    local vac_inputs_2D = compute_vacuum_inputs_from_equil(equil, mth, mlow, mpert, nlow)
+    local vac_inputs_3D = Vacuum.VacuumInput3D(vac_inputs_2D, nz, nlow, npert)
+
+    t = @elapsed begin
+        wv, _, _, _ = Vacuum.compute_vacuum_response_3D(vac_inputs_3D, wall_settings)
+    end
+
+    rel_err, _, ev_err = compute_accuracy_metrics(wv, wv_ref_3D)
+
+    push!(rel_errors_3D, rel_err)
+    push!(ev_errors_3D, ev_err)
+    push!(times_3D, t)
+
+    println(@sprintf "rel_err=%.2e, ev_err=%.2e, time=%.2fs" rel_err ev_err t)
+end
+
+# ============================================================================
+# 2D Convergence scan (same mtheta values)
+# ============================================================================
+
+println("\n" * "="^70)
+println("2D CONVERGENCE SCAN")
+println("="^70)
+
+rel_errors_2D = Float64[]
+ev_errors_2D = Float64[]
+times_2D = Float64[]
+
+for mth in mtheta_values
+    print("  mtheta=$mth ... ")
+
+    local vac_inputs_2D = compute_vacuum_inputs_from_equil(equil, mth, mlow, mpert, nlow)
+
+    t = @elapsed begin
+        wv, _, _ = Vacuum.compute_vacuum_response(vac_inputs_2D, wall_settings)
+    end
+
+    rel_err, _, ev_err = compute_accuracy_metrics(wv, wv_ref_2D)
+
+    push!(rel_errors_2D, rel_err)
+    push!(ev_errors_2D, ev_err)
+    push!(times_2D, t)
+
+    println(@sprintf "rel_err=%.2e, ev_err=%.2e, time=%.2fs" rel_err ev_err t)
+end
+
+# ============================================================================
+# 2D vs 3D comparison (compute difference at each mtheta)
+# ============================================================================
+
+println("\n" * "="^70)
+println("2D vs 3D COMPARISON (at matched mtheta)")
+println("="^70)
+
+rel_errors_2D_vs_3D = Float64[]
+ev_errors_2D_vs_3D = Float64[]
+
+for (i, mth) in enumerate(mtheta_values)
+    nz = nzeta_values[i]
+    print("  mtheta=$mth ... ")
+
+    local vac_inputs_2D = compute_vacuum_inputs_from_equil(equil, mth, mlow, mpert, nlow)
+    local vac_inputs_3D = Vacuum.VacuumInput3D(vac_inputs_2D, nz, nlow, npert)
+
+    # Compute both 2D and 3D
+    wv_2D, _, _ = Vacuum.compute_vacuum_response(vac_inputs_2D, wall_settings)
+    wv_3D, _, _, _ = Vacuum.compute_vacuum_response_3D(vac_inputs_3D, wall_settings)
+
+    # Compare 2D block against corresponding 3D block
+    # For n=nlow, the 2D wv corresponds to the first mpert×mpert block of the 3D wv
+    wv_3D_block = wv_3D[1:mpert, 1:mpert]
+
+    rel_err = norm(wv_2D - wv_3D_block) / norm(wv_2D)
+    ev_2D = maximum(real.(eigvals(wv_2D)))
+    ev_3D = maximum(real.(eigvals(wv_3D_block)))
+    ev_err = abs(ev_2D - ev_3D) / abs(ev_2D)
+
+    push!(rel_errors_2D_vs_3D, rel_err)
+    push!(ev_errors_2D_vs_3D, ev_err)
+
+    println(@sprintf "rel_err=%.2e, ev_err=%.2e" rel_err ev_err)
+end
+
+# ============================================================================
+# Create plots
+# ============================================================================
+
+println("\n" * "="^70)
+println("CREATING PLOTS")
+println("="^70)
+
+# Plot 1: 3D convergence vs total grid points
+p1 = plot(total_points, rel_errors_3D;
+    label="Relative Frobenius Error",
+    marker=:circle,
+    xlabel="Total grid points (mtheta × nzeta)",
+    ylabel="Relative Error",
+    title="3D Vacuum Response Convergence",
+    legend=:topright,
+    yscale=:log10,
+    xscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+plot!(p1, total_points, ev_errors_3D;
+    label="Max Eigenvalue Error",
+    marker=:square)
+
+# Plot 2: 2D convergence vs mtheta
+p2 = plot(mtheta_values, rel_errors_2D;
+    label="Relative Frobenius Error",
+    marker=:circle,
+    xlabel="mtheta (poloidal points)",
+    ylabel="Relative Error",
+    title="2D Vacuum Response Convergence",
+    legend=:topright,
+    yscale=:log10,
+    xscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+plot!(p2, mtheta_values, ev_errors_2D;
+    label="Max Eigenvalue Error",
+    marker=:square)
+
+# Plot 3: 2D vs 3D comparison vs mtheta
+p3 = plot(mtheta_values, rel_errors_2D_vs_3D;
+    label="2D vs 3D Rel. Error",
+    marker=:circle,
+    xlabel="mtheta (poloidal points)",
+    ylabel="Relative Difference (2D vs 3D)",
+    title="2D vs 3D Comparison (nzeta/mtheta = $aspect_ratio_grid)",
+    legend=:topright,
+    yscale=:log10,
+    xscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+plot!(p3, mtheta_values, ev_errors_2D_vs_3D;
+    label="Eigenvalue Rel. Diff.",
+    marker=:square)
+
+# Plot 4: Runtime comparison
+p4 = plot(total_points, times_3D;
+    label="3D (vs total points)",
+    marker=:circle,
+    xlabel="Grid points",
+    ylabel="Runtime (s)",
+    title="Runtime Scaling",
+    legend=:topleft,
+    xscale=:log10,
+    yscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+plot!(p4, mtheta_values, times_2D;
+    label="2D (vs mtheta)",
+    marker=:square)
+
+# Add O(N^2) reference lines
+N_ref_3D = total_points
+t_ref_scale_3D = times_3D[end] * (N_ref_3D ./ N_ref_3D[end]) .^ 2
+plot!(p4, N_ref_3D, t_ref_scale_3D; label="O(N²)", linestyle=:dash, color=:gray)
+
+# Combined plot
+p_combined = plot(p1, p2, p3, p4; layout=(2, 2), size=(1000, 800))
+
+# Save plots
+savefig(p1, joinpath(@__DIR__, "vacuum_3D_convergence_vs_gridpoints.pdf"))
+savefig(p2, joinpath(@__DIR__, "vacuum_2D_convergence_vs_mtheta.pdf"))
+savefig(p3, joinpath(@__DIR__, "vacuum_2D_vs_3D_comparison.pdf"))
+savefig(p_combined, joinpath(@__DIR__, "vacuum_convergence_combined.pdf"))
+
+println("Plots saved to $(joinpath(@__DIR__, "vacuum_*.pdf"))")
+
+# Display plots
+display(p_combined)
+
+# ============================================================================
+# Summary tables
+# ============================================================================
+
+println("\n" * "="^70)
+println("SUMMARY")
+println("="^70)
+
+println("\n3D Convergence (nzeta/mtheta = $aspect_ratio_grid):")
+println("  mtheta | nzeta | Total Pts |  Rel Error  |  EV Error   |  Time (s)")
+println("  " * "-"^65)
+for i in eachindex(mtheta_values)
+    @printf "  %6d | %5d | %9d |  %.2e  |  %.2e  |  %7.2f\n" mtheta_values[i] nzeta_values[i] total_points[i] rel_errors_3D[i] ev_errors_3D[i] times_3D[i]
+end
+
+println("\n2D Convergence:")
+println("  mtheta  |  Rel Error  |  EV Error   |  Time (s)")
+println("  " * "-"^50)
+for i in eachindex(mtheta_values)
+    @printf "  %6d  |  %.2e  |  %.2e  |  %7.2f\n" mtheta_values[i] rel_errors_2D[i] ev_errors_2D[i] times_2D[i]
+end
+
+println("\n2D vs 3D Comparison:")
+println("  mtheta  |  Rel Diff   |  EV Diff")
+println("  " * "-"^40)
+for i in eachindex(mtheta_values)
+    @printf "  %6d  |  %.2e  |  %.2e\n" mtheta_values[i] rel_errors_2D_vs_3D[i] ev_errors_2D_vs_3D[i]
+end
+
+println("\n" * "="^70)
+println("BENCHMARK COMPLETE")
+println("="^70)
diff --git a/benchmarks/Solovev_ideal_example/vacuum_3D_speed_benchmark.jl b/benchmarks/Solovev_ideal_example/vacuum_3D_speed_benchmark.jl
new file mode 100644
index 00000000..df9dc403
--- /dev/null
+++ b/benchmarks/Solovev_ideal_example/vacuum_3D_speed_benchmark.jl
@@ -0,0 +1,317 @@
+"""
+Speed and allocations benchmark for compute_vacuum_response_3D
+
+This script measures runtime and memory allocations as a function of grid size
+for the 3D vacuum response calculation. Uses fixed aspect ratio nzeta/mtheta.
+
+Outputs:
+- Runtime scaling vs grid size
+- Allocation scaling vs grid size
+- Breakdown of time spent in key functions
+"""
+
+using Pkg
+Pkg.activate("$(@__DIR__)/../..")
+
+using JPEC
+using JPEC.Vacuum
+using JPEC.Equilibrium
+using JPEC: Spl
+using LinearAlgebra
+using Printf
+using Plots
+using TOML
+
+default(; markersize=4, linewidth=2)
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+"""
+Compute vacuum inputs from equilibrium, following Free.jl logic.
+"""
+function compute_vacuum_inputs_from_equil(equil::PlasmaEquilibrium, mthvac::Int, mlow::Int, mpert::Int, n::Int)
+    psilim = equil.config.control.psihigh
+    mtheta_eq = equil.config.control.mtheta + 1
+    R = zeros(Float64, mtheta_eq)
+    Z = zeros(Float64, mtheta_eq)
+    ν = zeros(Float64, mtheta_eq)
+    r_minor = zeros(Float64, mtheta_eq)
+    θ_SFL = zeros(Float64, mtheta_eq)
+
+    for (i, θ) in enumerate(equil.rzphi.ys)
+        f = Spl.bicube_eval!(equil.rzphi, psilim, θ)
+        r_minor[i] = sqrt(f[1])
+        θ_SFL[i] = 2π * (θ + f[2])
+        ν[i] = f[3]
+    end
+
+    R .= equil.ro .+ r_minor .* cos.(θ_SFL)
+    Z .= equil.zo .+ r_minor .* sin.(θ_SFL)
+
+    return Vacuum.VacuumInput(;
+        r=reverse(R),
+        z=reverse(Z),
+        ν=reverse(ν),
+        mlow=mlow,
+        mpert=mpert,
+        n=n,
+        mtheta=mthvac,
+        force_wv_symmetry=true
+    )
+end
+
+"""
+Format bytes as human-readable string.
+"""
+function format_bytes(bytes::Integer)
+    if bytes < 1024
+        return @sprintf "%d B" bytes
+    elseif bytes < 1024^2
+        return @sprintf "%.2f KB" bytes / 1024
+    elseif bytes < 1024^3
+        return @sprintf "%.2f MB" bytes / 1024^2
+    else
+        return @sprintf "%.2f GB" bytes / 1024^3
+    end
+end
+
+# ============================================================================
+# Setup equilibrium and parameters
+# ============================================================================
+
+println("="^70)
+println("3D VACUUM RESPONSE SPEED/ALLOCATIONS BENCHMARK")
+println("="^70)
+
+# Load equilibrium from Solovev example
+example_dir = joinpath(@__DIR__, "../../examples/Solovev_ideal_example")
+equil = Equilibrium.setup_equilibrium(joinpath(example_dir, "equil.toml"))
+
+# Load DCON settings to get mode numbers
+dcon_inputs = TOML.parsefile(joinpath(example_dir, "dcon.toml"))
+wall_inputs = dcon_inputs["WALL"]
+wall_settings = Vacuum.WallShapeSettings(; (Symbol(k) => v for (k, v) in wall_inputs)...)
+
+# Determine mode numbers (following Main.jl logic)
+nlow = dcon_inputs["DCON_CONTROL"]["nn_low"]
+nhigh = dcon_inputs["DCON_CONTROL"]["nn_high"]
+npert = nhigh - nlow + 1
+
+# Poloidal mode range
+mlow = trunc(Int, min(nlow * equil.params.qmin, 0)) - 4
+mhigh = trunc(Int, nhigh * equil.params.qmax)
+mpert = mhigh - mlow + 1
+
+println("\nEquilibrium loaded from: $example_dir")
+println("  Major radius R0 = $(equil.ro)")
+println("  q on axis       = $(equil.params.qmin)")
+println("  q at edge       = $(equil.params.qmax)")
+println("\nMode numbers:")
+println("  n range: $nlow to $nhigh (npert = $npert)")
+println("  m range: $mlow to $mhigh (mpert = $mpert)")
+println("  Total modes: $(mpert * npert)")
+
+# ============================================================================
+# Benchmark parameters
+# ============================================================================
+
+# Fixed aspect ratio for 3D: nzeta/mtheta = 1.5
+aspect_ratio_grid = 1.5
+
+# Grid sizes to test
+mtheta_values = [16, 32, 48, 64, 96]  # Small test set
+nzeta_values = [round(Int, aspect_ratio_grid * m) for m in mtheta_values]
+total_points = [m * n for (m, n) in zip(mtheta_values, nzeta_values)]
+
+println("\n" * "="^70)
+println("BENCHMARK PARAMETERS")
+println("="^70)
+println("  Grid aspect ratio (nzeta/mtheta): $aspect_ratio_grid")
+println("  Mtheta values: $mtheta_values")
+println("  Nzeta values:  $nzeta_values")
+println("  Total points:  $total_points")
+
+# ============================================================================
+# Warmup run (compile everything)
+# ============================================================================
+
+println("\n" * "="^70)
+println("WARMUP RUN (compiling)")
+println("="^70)
+
+mth_warmup = mtheta_values[1]
+nz_warmup = nzeta_values[1]
+print("  Running warmup with mtheta=$mth_warmup, nzeta=$nz_warmup ... ")
+
+vac_inputs_2D_warmup = compute_vacuum_inputs_from_equil(equil, mth_warmup, mlow, mpert, nlow)
+vac_inputs_3D_warmup = Vacuum.VacuumInput3D(vac_inputs_2D_warmup, nz_warmup, nlow, npert)
+Vacuum.compute_vacuum_response_3D(vac_inputs_3D_warmup, wall_settings)
+println("done")
+
+# ============================================================================
+# Speed and allocation benchmark
+# ============================================================================
+
+println("\n" * "="^70)
+println("3D SPEED/ALLOCATION BENCHMARK (nzeta/mtheta = $aspect_ratio_grid)")
+println("="^70)
+
+times_3D = Float64[]
+allocs_3D = Int[]
+alloc_bytes_3D = Int[]
+
+for (i, mth) in enumerate(mtheta_values)
+    nz = nzeta_values[i]
+    print("  mtheta=$mth, nzeta=$nz ($(mth*nz) pts) ... ")
+
+    local vac_inputs_2D = compute_vacuum_inputs_from_equil(equil, mth, mlow, mpert, nlow)
+    local vac_inputs_3D = Vacuum.VacuumInput3D(vac_inputs_2D, nz, nlow, npert)
+
+    # Run with allocation tracking
+    stats = @timed Vacuum.compute_vacuum_response_3D(vac_inputs_3D, wall_settings)
+
+    push!(times_3D, stats.time)
+    push!(allocs_3D, Base.gc_alloc_count(stats.gcstats))
+    push!(alloc_bytes_3D, stats.bytes)
+
+    println(@sprintf "time=%.3fs, allocs=%d, bytes=%s" stats.time Base.gc_alloc_count(stats.gcstats) format_bytes(stats.bytes))
+end
+
+# ============================================================================
+# Create plots
+# ============================================================================
+
+println("\n" * "="^70)
+println("CREATING PLOTS")
+println("="^70)
+
+# Plot 1: Runtime scaling
+p1 = plot(total_points, times_3D;
+    label="Measured",
+    marker=:circle,
+    xlabel="Total grid points (mtheta × nzeta)",
+    ylabel="Runtime (s)",
+    title="3D Vacuum Runtime Scaling",
+    legend=:topleft,
+    xscale=:log10,
+    yscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+
+# Add O(N²) reference line
+if length(times_3D) >= 2
+    N_ref = total_points
+    t_ref_scale = times_3D[end] * (N_ref ./ N_ref[end]) .^ 2
+    plot!(p1, N_ref, t_ref_scale; label="O(N²)", linestyle=:dash, color=:gray)
+end
+
+# Plot 2: Allocation count scaling
+p2 = plot(total_points, allocs_3D;
+    label="Allocation count",
+    marker=:circle,
+    xlabel="Total grid points (mtheta × nzeta)",
+    ylabel="Number of allocations",
+    title="Allocation Count Scaling",
+    legend=:topleft,
+    xscale=:log10,
+    yscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+
+# Add O(N) and O(N²) reference lines for allocations
+if length(allocs_3D) >= 2
+    N_ref = total_points
+    alloc_ref_N = allocs_3D[end] * (N_ref ./ N_ref[end])
+    alloc_ref_N2 = allocs_3D[end] * (N_ref ./ N_ref[end]) .^ 2
+    plot!(p2, N_ref, alloc_ref_N; label="O(N)", linestyle=:dash, color=:blue)
+    plot!(p2, N_ref, alloc_ref_N2; label="O(N²)", linestyle=:dash, color=:red)
+end
+
+# Plot 3: Memory allocation scaling
+p3 = plot(total_points, alloc_bytes_3D ./ 1e6;
+    label="Memory allocated",
+    marker=:circle,
+    xlabel="Total grid points (mtheta × nzeta)",
+    ylabel="Memory allocated (MB)",
+    title="Memory Allocation Scaling",
+    legend=:topleft,
+    xscale=:log10,
+    yscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+
+# Add O(N²) reference line for memory
+if length(alloc_bytes_3D) >= 2
+    N_ref = total_points
+    mem_ref_N2 = alloc_bytes_3D[end] * (N_ref ./ N_ref[end]) .^ 2 ./ 1e6
+    plot!(p3, N_ref, mem_ref_N2; label="O(N²)", linestyle=:dash, color=:gray)
+end
+
+# Plot 4: Allocations per grid point
+allocs_per_point = allocs_3D ./ total_points
+p4 = plot(total_points, allocs_per_point;
+    label="Allocs per grid point",
+    marker=:circle,
+    xlabel="Total grid points (mtheta × nzeta)",
+    ylabel="Allocations / grid point",
+    title="Allocations per Grid Point",
+    legend=:topright,
+    xscale=:log10,
+    minorgrid=true,
+    framestyle=:box)
+
+# Combined plot
+p_combined = plot(p1, p2, p3, p4; layout=(2, 2), size=(1000, 800))
+
+# Save plots
+savefig(p1, joinpath(@__DIR__, "vacuum_3D_runtime_scaling.pdf"))
+savefig(p2, joinpath(@__DIR__, "vacuum_3D_allocation_count_scaling.pdf"))
+savefig(p3, joinpath(@__DIR__, "vacuum_3D_memory_scaling.pdf"))
+savefig(p_combined, joinpath(@__DIR__, "vacuum_3D_speed_benchmark_combined.pdf"))
+
+println("Plots saved to $(joinpath(@__DIR__, "vacuum_3D_*.pdf"))")
+
+# Display plots
+display(p_combined)
+
+# ============================================================================
+# Summary table
+# ============================================================================
+
+println("\n" * "="^70)
+println("SUMMARY")
+println("="^70)
+
+println("\n3D Speed/Allocation Benchmark (nzeta/mtheta = $aspect_ratio_grid):")
+println("  mtheta | nzeta | Total Pts |  Time (s)  |   Allocs   |   Memory")
+println("  " * "-"^70)
+for i in eachindex(mtheta_values)
+    @printf "  %6d | %5d | %9d |  %8.3f  | %10d | %10s\n" mtheta_values[i] nzeta_values[i] total_points[i] times_3D[i] allocs_3D[i] format_bytes(alloc_bytes_3D[i])
+end
+
+# Compute scaling exponents if we have enough data points
+if length(mtheta_values) >= 2
+    println("\nScaling Analysis:")
+
+    # Runtime scaling: t ∝ N^α
+    log_N = log.(total_points)
+    log_t = log.(times_3D)
+    α_time = (log_t[end] - log_t[1]) / (log_N[end] - log_N[1])
+    println(@sprintf "  Runtime scaling exponent: α = %.2f (expect ~2 for O(N²))" α_time)
+
+    # Allocation scaling: allocs ∝ N^β
+    log_allocs = log.(allocs_3D)
+    β_allocs = (log_allocs[end] - log_allocs[1]) / (log_N[end] - log_N[1])
+    println(@sprintf "  Allocation count scaling exponent: β = %.2f" β_allocs)
+
+    # Memory scaling: bytes ∝ N^γ
+    log_bytes = log.(alloc_bytes_3D)
+    γ_mem = (log_bytes[end] - log_bytes[1]) / (log_N[end] - log_N[1])
+    println(@sprintf "  Memory scaling exponent: γ = %.2f (expect ~2 for O(N²) matrices)" γ_mem)
+end
+
+println("\n" * "="^70)
+println("BENCHMARK COMPLETE")
+println("="^70)
diff --git a/examples/Solovev_ideal_example_3D/dcon.toml b/examples/Solovev_ideal_example_3D/dcon.toml
new file mode 100644
index 00000000..46dbef2c
--- /dev/null
+++ b/examples/Solovev_ideal_example_3D/dcon.toml
@@ -0,0 +1,51 @@
+[DCON_CONTROL]
+bal_flag = false              # Ideal MHD ballooning criterion for short wavelengths
+mat_flag = true               # Construct coefficient matrices for diagnostic purposes
+ode_flag = true               # Integrate ODE's for determining stability of internal long-wavelength mode (must be true for GPEC)
+vac_flag = true               # Compute plasma, vacuum, and total energies for free-boundary modes
+mer_flag = true               # Evaluate the Mercier criterian
+
+set_psilim_via_dmlim = false  # Safety factor (q) limit determined as q_ir+dmlim...
+dmlim = 0.2                   # See sas_flag
+qlow = 1.02                   # Integration initiated at q determined by min(q0, qlow)...
+qhigh = 1e3                   # Integration terminated at q limit determined by min(qa, qhigh)...
+sing_start = 0                # Start integration at the sing_start'th rational from the axis (psilow)
+
+nn_low = 1                    # Smallest toroidal mode number to include
+nn_high = 1                   # Largest toroidal mode number to include
+delta_mlow = 8                # Expands lower bound of Fourier harmonics
+delta_mhigh = 8               # Expands upper bound of Fourier harmonics
+delta_mband = 0               # Integration keeps only this wide a band...
+mthvac = 128                  # Number of points used in splines over poloidal angle at plasma-vacuum interface.
+nzvac = 64
+thmax0 = 1                    # Linear multiplier on the automatic choice of theta integration bounds
+
+kin_flag = false              # Kinetic EL equation (default: false)
+con_flag = false              # Continue integration through layers (default: false)
+kinfac1 = 1.0                 # Scale factor for energy contribution (default: 1.0)
+kinfac2 = 1.0                 # Scale factor for torque contribution (default: 1.0)
+kingridtype = 0               # Regular grid method (default: 0)
+passing_flag = true           # Includes passing particle effects (default: false)
+ktanh_flag = true             # Ignore kinetic effects in the core smoothly (default: false)
+ktc = 0.1                     # Parameter for ktanh_flag (default: 0.1)
+ktw = 50.0                    # Parameter for ktanh_flag (default: 50.0)
+ion_flag = true               # Include ion dW_k when kin_flag is true
+electron_flag = false         # Include electron dW_k when kin_flag is true
+
+tol_nr = 1e-6                 # Relative tolerance of dynamic integration steps away from rationals
+tol_r = 1e-7                  # Relative tolerance of dynamic integration steps near rationals
+crossover = 1e-2              # Fractional distance from rational q at which tol switches
+singfac_min = 1e-4            # Fractional distance from rational q at which ideal jump enforced
+ucrit = 1e3                   # Maximum fraction of solutions allowed before re-normalized
+force_wv_symmetry = true      # Forces vacuum energy matrix symmetry
+save_interval = 10            # Save every Nth ODE step (1=all, 10=every 10th). Always saves near rational surfaces.
+
+[WALL]
+shape = "nowall"           # String selecting wall shape ["nowall", "conformal", "elliptical", "dee", "mod_dee", "from_file"]
+a = 0.2415                    # The distance of the wall from the plasma in units of major radius (conformal), or minor radius parameter (others).
+aw = 0.05                     # Half-thickness of the wall.
+bw = 1.5                      # Elongation.
+cw = 0                        # Offset of the center of the wall from the major radius.
+dw = 0.5                      # Triangularity
+tw = 0.05                     # Sharpness of the corners of the wall. Try 0.05 as a good initial value.
+equal_arc_wall = false         # Flag to enforce equal arcs distribution of the nodes on the wall. Best results unless the wall is very close to the plasma.
diff --git a/examples/Solovev_ideal_example_3D/equil.toml b/examples/Solovev_ideal_example_3D/equil.toml
new file mode 100644
index 00000000..7abe0a9d
--- /dev/null
+++ b/examples/Solovev_ideal_example_3D/equil.toml
@@ -0,0 +1,30 @@
+[EQUIL_CONTROL]
+eq_type = "sol"             # Type of the input 2D equilibrium file
+eq_filename = "sol.toml"  # path to equilibrium file
+
+jac_type = "pest"          # Coordinate system (hamada, pest, boozer, equal_arc)
+power_bp = 0                 # del.B ~ B_p^power_bp * B^power_b / R^power_r
+power_b = 0                  # del.B ~ B_p^power_bp * B^power_b / R^power_r
+power_r = 0                  # del.B ~ B_p^power_bp * B^power_b / R^power_r
+
+grid_type = "ldp"            # Radial grid packing
+psilow = 1e-4                # Min psi (normalized)
+psihigh = 0.99999              # Max psi (normalized)
+mpsi = 128                   # Radial grid intervals
+mtheta = 256                 # Poloidal grid intervals
+
+newq0 = 0                    # Override q(0)
+etol = 1e-7                  # Reconstruction tolerance
+use_classic_splines = false  # Use classical spline (vs. tri-diagonal)
+
+input_only = false           # Quit after input read
+
+[EQUIL_OUTPUT]
+gse_flag = false           # Output G-S equation accuracy diagnostics
+out_eq_1d = false          # ASCII output of 1D eq data
+bin_eq_1d = false          # Binary output of 1D eq data
+out_eq_2d = false          # ASCII output of 2D eq data
+bin_eq_2d = true           # Binary output of 2D eq data (used by GPEC)
+out_2d = false             # ASCII output of processed 2D data
+bin_2d = false             # Binary output of processed 2D data
+dump_flag = false          # Binary dump of equilibrium data
diff --git a/examples/Solovev_ideal_example_3D/sol.toml b/examples/Solovev_ideal_example_3D/sol.toml
new file mode 100644
index 00000000..303a6ec0
--- /dev/null
+++ b/examples/Solovev_ideal_example_3D/sol.toml
@@ -0,0 +1,11 @@
+[SOL_INPUT]
+mr = 128      # number of radial grid zones
+mz = 128      # number of axial grid zones
+ma = 128      # number of flux grid zones
+e = 1.0       # elongation
+a = 1.0      # minor radius
+r0 = 5.0      # major radius
+q0 = 1.9      # safety factor at the o-point
+p0fac=1       # scale on-axis pressure (P-> P+P0*p0fac. beta changes. Phi,q constant)
+b0fac=1       # scale toroidal field at constant beta (s*Phi,s*f,s^2*P. bt changes. Shape,beta constant)
+f0fac=1       # scale toroidal field at constant pressure (s*f. beta,q changes. Phi,p,bp constant)
diff --git a/src/DCON/DconStructs.jl b/src/DCON/DconStructs.jl
index 7ef65741..065d1daa 100644
--- a/src/DCON/DconStructs.jl
+++ b/src/DCON/DconStructs.jl
@@ -211,6 +211,7 @@ A mutable struct containing control parameters for stability analysis, set by th
     mer_flag::Bool = false
     fft_flag::Bool = false
     mthvac::Int = 480
+    nzvac::Int = 64
     sing_start::Int = 0
     nn_low::Int = 0
     nn_high::Int = 0
@@ -288,7 +289,8 @@ Populated in `Free.jl`.
 ## Fields
 
   - `mthvac::Int` - Number of vacuum poloidal grid points (corresponds to `mtheta` in VacuumInput)
-  - `mpert::Int` - Number of poloidal modes
+
+  - `nzvac::Int` - Number of vacuum toroidal grid points (corresponds to `nzeta` in VacuumInput)
   - `numpert_total::Int` - Total number of modes (mpert × npert)
   - `wt::Array{ComplexF64, 2}` - Toroidal vacuum response matrix (numpert_total × numpert_total)
   - `wt0::Array{ComplexF64, 2}` - Reference toroidal vacuum matrix (numpert_total × numpert_total)
@@ -296,12 +298,16 @@ Populated in `Free.jl`.
   - `ep::Vector{ComplexF64}` - Plasma eigenvalues
   - `ev::Vector{ComplexF64}` - Vacuum eigenvalues
   - `et::Vector{ComplexF64}` - Total eigenvalues of plasma + vacuum
-  - `grri::Array{Float64, 2}` - Green's function radial integrals (2×mthvac × 2×mpert)
-  - `xzpts::Array{Float64, 2}` - Coordinate points [R_plasma, Z_plasma, R_wall, Z_wall] (mthvac × 4)
+  - `green_fourier::Array{Float64, 2}` - Fourier transformed Green's function (2 * mthvac * nzvac x 2 * numpert_total)
+
+      + First mthvac*nzvac rows correspond to plasma, second to wall
+      + First numpert_total columns correspond to real cosine part, second to imaginary sine part
+  - `plasma_coords::Array{Float64, 2}` - Cartesian coordinate points of plasma surface (mthvac * nzvac x 3)
+  - `wall_coords::Array{Float64, 2}` - Cartesian coordinate points of wall surface (mthvac * nzvac x 3)
 """
 @kwdef mutable struct VacuumData
     mthvac::Int
-    mpert::Int
+    nzvac::Int
     numpert_total::Int
 
     wt::Array{ComplexF64,2} = Array{ComplexF64}(undef, numpert_total, numpert_total)
@@ -310,14 +316,12 @@ Populated in `Free.jl`.
     ep::Vector{ComplexF64} = Vector{ComplexF64}(undef, numpert_total)
     ev::Vector{ComplexF64} = Vector{ComplexF64}(undef, numpert_total)
     et::Vector{ComplexF64} = Vector{ComplexF64}(undef, numpert_total)
-
-    # VACUUM can't handle 3D yet, so these are temporary mpert arrays
-    # TODO: Matt separated grri into a few arrays for IPEC, will need to do that later
-    grri::Array{Float64,2} = Array{Float64}(undef, 2 * mthvac, 2 * mpert)
-    xzpts::Array{Float64,2} = Array{Float64}(undef, mthvac, 4)
+    green_fourier::Array{Float64,2} = Array{Float64}(undef, 2 * mthvac * nzvac, 2 * numpert_total)
+    plasma_coords::Array{Float64,2} = Array{Float64}(undef, mthvac * nzvac, 3)
+    wall_coords::Array{Float64,2} = Array{Float64}(undef, mthvac * nzvac, 3)
 end
 
-VacuumData(mthvac::Int, mpert::Int, numpert_total::Int) = VacuumData(; mthvac, mpert, numpert_total)
+VacuumData(mthvac::Int, nzvac::Int, numpert_total::Int) = VacuumData(; mthvac, nzvac, numpert_total)
 
 """
 OdeState
@@ -433,60 +437,3 @@ and a small set of temporary matrices and factors used to compute singular-layer
 end
 
 OdeState(numpert_total::Int, numsteps_init::Int, numunorms_init::Int, msing::Int) = OdeState(; numpert_total, numsteps_init, numunorms_init, msing)
-
-# Below here are debug output structs used for benchmarking and unit testing
-
-
-
-
-"""
-    VacuumBenchmarkInputs
-
-A struct to hold all inputs required for vacuum benchmarking between Fortran and Julia implementations.
-
-## Fields
-
-  - `wv_block::Matrix{ComplexF64}` - Vacuum response matrix block
-  - `mpert::Int` - Number of poloidal modes
-  - `mtheta_eq::Int` - Number of poloidal grid points in input equilibrium (corresponds to `mtheta_eq` in VacuumInput)
-  - `mthvac::Int` - Number of poloidal grid points in vacuum calculations (corresponds to `mtheta` in VacuumInput)
-  - `complex_flag::Bool` - Flag indicating if complex arithmetic is used
-  - `kernelsign::Float64` - Sign of the kernel for vacuum calculation
-  - `wall_flag::Bool` - Flag indicating presence of wall
-  - `farwall_flag::Bool` - Flag indicating presence of far wall
-  - `grri::Matrix{Float64}` - Green's function response matrix
-  - `xzpts::Matrix{Float64}` - Coordinate points on plasma boundary [R, Z]
-  - `ahg_file::String` - Filename for AHG data
-  - `dir_path::String` - Directory path for input/output files
-  - `vac_inputs::Vacuum.VacuumInput` - VacuumInput struct for Julia vacuum code
-  - `wall_settings::Vacuum.WallShapeSettings` - Wall shape settings
-  - `n::Int` - Toroidal mode number
-  - `ipert_n::Int` - Index of perturbed toroidal mode
-  - `psifac::Float64` - Normalized flux coordinate
-"""
-@kwdef struct VacuumBenchmarkInputs
-    # Vacuum computation parameters
-    wv_block::Matrix{ComplexF64}
-    mpert::Int
-    mtheta_eq::Int
-    mthvac::Int
-    complex_flag::Bool
-    kernelsign::Float64
-    wall_flag::Bool
-    farwall_flag::Bool
-    grri::Matrix{Float64}
-    xzpts::Matrix{Float64}
-    ahg_file::String
-    dir_path::String
-
-    # VacuumInput struct for Julia code
-    vac_inputs::Vacuum.VacuumInput
-
-    # Wall settings
-    wall_settings::Vacuum.WallShapeSettings
-
-    # Additional context
-    n::Int
-    ipert_n::Int
-    psifac::Float64
-end
diff --git a/src/DCON/Free.jl b/src/DCON/Free.jl
index bb8673cc..7874f38e 100644
--- a/src/DCON/Free.jl
+++ b/src/DCON/Free.jl
@@ -10,7 +10,7 @@ and data dumping.
 function free_run!(odet::OdeState, ctrl::DconControl, equil::Equilibrium.PlasmaEquilibrium, ffit::FourFitVars, intr::DconInternal)
 
     # Initializations and allocations
-    vac = VacuumData(ctrl.mthvac, intr.mpert, intr.numpert_total)
+    vac = VacuumData(ctrl.mthvac, ctrl.nzvac, intr.numpert_total)
     etemp = zeros(ComplexF64, intr.numpert_total)
     wp = zeros(ComplexF64, intr.numpert_total, intr.numpert_total)
     wpt = zeros(ComplexF64, intr.numpert_total, intr.numpert_total)
@@ -29,25 +29,10 @@ function free_run!(odet::OdeState, ctrl::DconControl, equil::Equilibrium.PlasmaE
         # Set VACUUM run parameters and boundary shape
         n = ipert_n - 1 + intr.nlow
         vac_inputs = compute_vacuum_inputs(intr.psilim, n, ctrl, equil, intr)
-        fill!(vac.grri, 0.0)
-        fill!(vac.xzpts, 0.0)
+        fill!(vac.green_fourier, 0.0)
 
         # Compute block of vacuum energy matrix for one toroidal mode number
-        wv_block, vac.grri, vac.xzpts = Vacuum.compute_vacuum_response(vac_inputs, intr.wall_settings)
-
-        # Output data for unit testing and benchmarking
-        if intr.debug_settings.output_benchmark_data
-            @info "Outputting top level vacuum debug data for n = $n"
-            farwall_flag = intr.wall_settings.shape == "nowall" ? true : false
-            benchmark_inputs = VacuumBenchmarkInputs(
-                wv_block, intr.mpert, equil.config.control.mtheta, ctrl.mthvac,
-                true, vac_inputs.kernelsign, false,
-                farwall_flag, vac.grri, vac.xzpts, "ahg2msc_dcon.out", intr.dir_path,
-                vac_inputs, intr.wall_settings,
-                n, ipert_n, intr.psilim
-            )
-            @save "vacuum_response_inputs.jld2" benchmark_inputs
-        end
+        wv_block, vac.green_fourier, vac.plasma_coords, vac.wall_coords = Vacuum.compute_vacuum_response(vac_inputs, intr.wall_settings)
 
         # Equation 126 in Chance 1997 - scale by (m - n*q)(m' - n*q)
         singfac = collect(intr.mlow:intr.mhigh) .- (n * intr.qlim)
@@ -60,6 +45,45 @@ function free_run!(odet::OdeState, ctrl::DconControl, equil::Equilibrium.PlasmaE
         @views vac.wv[((ipert_n-1)*intr.mpert+1):(ipert_n*intr.mpert), ((ipert_n-1)*intr.mpert+1):(ipert_n*intr.mpert)] .= wv_block
     end
 
+    if ctrl.nzvac > 1
+        if ctrl.verbose
+            println("Computing 3D vacuum response matrix in addition to 2D matrix with nzvac = $(ctrl.nzvac)")
+        end
+
+        # Compute 3D vacuum response matrix
+        vac_inputs = compute_vacuum_inputs(intr.psilim, 1, ctrl, equil, intr) # n doesn't matter here
+        vac_inputs_3D = Vacuum.VacuumInput3D(vac_inputs, ctrl.nzvac, intr.nlow, intr.npert)
+        t_3d = @elapsed wv3D, _, _, _ = Vacuum.compute_vacuum_response_3D(vac_inputs_3D, intr.wall_settings)
+        if ctrl.verbose
+            println("3D vacuum response computation time: $(round(t_3d, digits=4))s")
+        end
+
+        # Scale by (m - n*q)(m' - n'*q)
+        singfac = vec((intr.mlow:intr.mhigh) .- intr.qlim .* (intr.nlow:intr.nhigh)')
+        @inbounds for ipert in 1:intr.numpert_total
+            @views wv3D[ipert, :] .*= singfac[ipert]
+            @views wv3D[:, ipert] .*= singfac[ipert]
+        end
+
+        # DEBUG
+        println("2D Vacuum response matrix wv:")
+        display(vac.wv)
+        println("3D Vacuum response matrix wv3D:")
+        display(wv3D)
+        println("Maximum relative difference between 2D and 3D vacuum response matrices:")
+        display(maximum(abs.(vac.wv .- wv3D)) / maximum(abs.(vac.wv)))
+        println("Maximum difference between 2D and 3D vacuum response matrices:")
+        display(maximum(abs.(vac.wv .- wv3D)))
+        println("Relative difference in maximum eigenvalue:")
+        display((maximum(real.(eigvals(vac.wv))) - maximum(real.(eigvals(wv3D)))) / maximum(real.(eigvals(vac.wv))))
+        println("Difference in maximum eigenvalue:")
+        display((maximum(real.(eigvals(vac.wv))) - maximum(real.(eigvals(wv3D)))))
+        error("Vacuum response matrix computation complete.")
+    end
+
+    println("2D Vacuum response matrix wv:")
+    display(vac.wv)
+
     # Compute complex energy eigenvalues and vectors
     vac.wt .= wp .+ vac.wv
     vac.wt0 .= vac.wt
@@ -96,7 +120,6 @@ function free_run!(odet::OdeState, ctrl::DconControl, equil::Equilibrium.PlasmaE
     end
 
     # Compute plasma and vacuum contributions.
-    # wpt = wt' * wp * wt  ; wvt = wt' * wv * wt
     wpt .= adjoint(vac.wt) * (wp * vac.wt)
     wvt .= adjoint(vac.wt) * (vac.wv * vac.wt)
     for ipert in 1:intr.numpert_total
@@ -138,43 +161,46 @@ the r, z, and ν values at the plasma boundary, as well as mode numbers and numb
 
 ### Arguments
 
-  - `psifac`: Flux surface value at the plasma boundary (Float64)
+  - `ψ`: Flux surface value at the plasma boundary (Float64)
   - `n`: Toroidal mode number (Int)
   - `ctrl`: DCON control parameters (DconControl)
   - `equil`: Plasma equilibrium data (Equilibrium.PlasmaEquilibrium)
   - `intr`: Internal DCON parameters (DconInternal)
 """
-function compute_vacuum_inputs(psifac::Float64, n::Int, ctrl::DconControl, equil::Equilibrium.PlasmaEquilibrium, intr::DconInternal)
+function compute_vacuum_inputs(ψ::Float64, n::Int, ctrl::DconControl, equil::Equilibrium.PlasmaEquilibrium, intr::DconInternal)
 
     # Allocations
-    theta_norm = Vector(equil.rzphi.ys)
     mtheta = equil.config.control.mtheta + 1
-    angle = zeros(Float64, mtheta)
-    r = zeros(Float64, mtheta)
-    z = zeros(Float64, mtheta)
+    θ_SFL = zeros(Float64, mtheta)
+    R = zeros(Float64, mtheta)
+    Z = zeros(Float64, mtheta)
     ν = zeros(Float64, mtheta)
-    rfac = zeros(Float64, mtheta)
-
-    # Compute r, z, and ν at the plasma boundary
-    for itheta in 1:mtheta
-        f = Spl.bicube_eval!(equil.rzphi, psifac, theta_norm[itheta])
-        rfac[itheta] = sqrt(f[1])
-        angle[itheta] = 2π * (theta_norm[itheta] + f[2])
-        ν[itheta] = f[3]
+    r_minor = zeros(Float64, mtheta)
+
+    # Compute geometric quantities on plasma boundary
+    for (i, θ) in enumerate(equil.rzphi.ys)
+        f = Spl.bicube_eval!(equil.rzphi, ψ, θ)
+        r_minor[i] = sqrt(f[1])
+        θ_SFL[i] = 2π * (θ + f[2]) # f[2] = λ(ψ, θ) / 2π
+        ν[i] = f[3]
     end
-    r .= equil.ro .+ rfac .* cos.(angle)
-    z .= equil.zo .+ rfac .* sin.(angle)
 
-    # Invert values for n < 0
+    # Compute R and Z on straight-fieldline θ grid
+    R .= equil.ro .+ r_minor .* cos.(θ_SFL)
+    Z .= equil.zo .+ r_minor .* sin.(θ_SFL)
+
+    # Invert values for n < 0 # TODO: move this to VACUUM?
+    # This is here because the only thing that changes in 2D for n<0 is (mθ - nν)
+    # since Gⁿ = G⁻ⁿ and Kⁿ = K⁻ⁿ? Confirm later and generalize to 3D
     if n < 0
         ν .= -ν
         n = -n
     end
 
     return Vacuum.VacuumInput(;
-        r=reverse(r),
-        z=reverse(z),
-        ν=reverse(ν),
+        r=R,
+        z=Z,
+        ν=ν,
         mlow=intr.mlow,
         mpert=intr.mpert,
         n=n,
diff --git a/src/DCON/Main.jl b/src/DCON/Main.jl
index 10789bec..15bf4aca 100644
--- a/src/DCON/Main.jl
+++ b/src/DCON/Main.jl
@@ -314,10 +314,12 @@ function write_outputs_to_HDF5(ctrl::DconControl, equil::Equilibrium.PlasmaEquil
             out_h5["vacuum/ep"] = vac.ep
             out_h5["vacuum/ev"] = vac.ev
             out_h5["vacuum/et"] = vac.et
-            out_h5["vacuum/x_plasma"] = vac.xzpts[:, 1]
-            out_h5["vacuum/z_plasma"] = vac.xzpts[:, 2]
-            out_h5["vacuum/x_wall"] = vac.xzpts[:, 3]
-            out_h5["vacuum/z_wall"] = vac.xzpts[:, 4]
+            out_h5["vacuum/x_plasma"] = vac.plasma_coords[:, 1]
+            out_h5["vacuum/y_plasma"] = vac.plasma_coords[:, 2]
+            out_h5["vacuum/z_plasma"] = vac.plasma_coords[:, 3]
+            out_h5["vacuum/x_wall"] = vac.wall_coords[:, 1]
+            out_h5["vacuum/y_wall"] = vac.wall_coords[:, 2]
+            out_h5["vacuum/z_wall"] = vac.wall_coords[:, 3]
         end
     end
 end
diff --git a/src/Vacuum/DataTypes.jl b/src/Vacuum/DataTypes.jl
index 122feb59..8d3dfbdd 100644
--- a/src/Vacuum/DataTypes.jl
+++ b/src/Vacuum/DataTypes.jl
@@ -28,53 +28,60 @@ Struct holding plasma boundary and mode data as provided from DCON namelist and
 end
 
 """
-    PlasmaGeometry
+    VacuumInput3D
 
-Struct holding plasma geometry data on the mtheta grid for vacuum calculations. Arrays are
-of length `mtheta`, where `mtheta` is the number of poloidal grid points and θ ∈ [0, 1).
-It also precomputes trigonometric basis functions needed for Fourier calculations into matrices
-of size (mtheta, mpert), where `mpert` is the number of poloidal modes.
+Struct holding 2D plasma boundary for a 3D VACUUM run `and mode data as provided from DCON namelist and computed quantities.
+2D contour becomes a 3D axisymmetric surface by toroidal extrusion.
 
 # Fields
 
-  - `x::Vector{Float64}`: Plasma surface R-coordinate on VACUUM theta grid
-  - `z::Vector{Float64}`: Plasma surface Z-coordinate on VACUUM theta grid
-  - `dx_dtheta::Vector{Float64}`: Derivative dR/dθ at plasma surface
-  - `dz_dtheta::Vector{Float64}`: Derivative dZ/dθ at plasma surface
-  - `sin_ln_basis::Matrix{Float64}`: sin(lθ - nν) basis functions for poloidal modes at plasma surface
-  - `cos_ln_basis::Matrix{Float64}`: cos(lθ - nν) basis functions for poloidal modes at plasma surface
+  - `x::Vector{Float64}`: Plasma boundary X-coordinate on DCON theta grid
+  - `z::Vector{Float64}`: Plasma boundary Z-coordinate on DCON theta grid
+  - `ν::Vector{Float64}`: Free parameter in specifying toroidal angle, ϕ = 2πζ + ν(ψ, θ), on DCON theta grid
+  - `mlow::Int`: Lower poloidal mode number
+  - `mpert::Int`: Number of poloidal modes
+  - `nlow::Int`: Lower toroidal mode number
+  - `npert::Int`: Number of toroidal modes
+  - `mtheta::Int`: Number of poloidal collocation points
+  - `nzeta::Int`: Number of toroidal collocation points
+  - `kernelsign::Float64`: Sign for kernel; +1 or -1, only ≠ 1 for mutual inductance calculations
+  - `force_wv_symmetry::Bool`: Boolean flag to enforce symmetry in the vacuum response matrix (set in dcon.toml)
 """
-struct PlasmaGeometry
-    x::Vector{Float64}
-    z::Vector{Float64}
-    dx_dtheta::Vector{Float64}
-    dz_dtheta::Vector{Float64}
-    sin_ln_basis::Matrix{Float64}
-    cos_ln_basis::Matrix{Float64}
+@kwdef struct VacuumInput3D
+    x::Vector{Float64} = Float64[]
+    z::Vector{Float64} = Float64[]
+    ν::Vector{Float64} = Float64[]
+    mlow::Int = 0
+    mpert::Int = 0
+    nlow::Int = 0
+    npert::Int = 0
+    n::Int = 0
+    mtheta::Int = 1
+    nzeta::Int = 1
+    kernelsign::Float64 = 1.0
+    force_wv_symmetry::Bool = true
 end
 
 """
-    WallGeometry
-
-Struct holding wall geometry data for vacuum calculations. Arrays are of length
-`mtheta`, where `mtheta` is the number of poloidal grid points and θ ∈ [0, 1).
+    VacuumInput3D(inputs_2D::VacuumInput, nzeta::Int, nlow::Int, npert::Int)
 
-# Fields
-
-  - `nowall::Bool`: Boolean flag indicating if there is no wall
-  - `is_closed_toroidal::Bool`: Boolean flag indicating if the wall is a closed toroidal surface
-  - `x::Vector{Float64}`: Wall R-coordinates
-  - `z::Vector{Float64}`: Wall Z-coordinates
-  - `dx_dtheta::Vector{Float64}`: Derivative dR/dθ at wall
-  - `dz_dtheta::Vector{Float64}`: Derivative dZ/dθ at wall
+Convenience constructor for a 3D VacuumInput3D struct from a 2D VacuumInput with the additional
+required parameters for the toroidal grid/modes.
 """
-struct WallGeometry
-    nowall::Bool
-    is_closed_toroidal::Bool
-    x::Vector{Float64}
-    z::Vector{Float64}
-    dx_dtheta::Vector{Float64}
-    dz_dtheta::Vector{Float64}
+function VacuumInput3D(inputs_2D::VacuumInput, nzeta::Int, nlow::Int, npert::Int)
+    return VacuumInput3D(;
+        x=inputs_2D.r,
+        z=inputs_2D.z,
+        ν=inputs_2D.ν,
+        mlow=inputs_2D.mlow,
+        mpert=inputs_2D.mpert,
+        nlow=nlow,
+        npert=npert,
+        mtheta=inputs_2D.mtheta,
+        nzeta=nzeta,
+        kernelsign=inputs_2D.kernelsign,
+        force_wv_symmetry=inputs_2D.force_wv_symmetry
+    )
 end
 
 """
@@ -120,9 +127,36 @@ Struct containing input settings for vacuum wall geometry.
 end
 
 """
-    initialize_plasma_surface(inputs::VacuumInput) -> PlasmaGeometry
+    PlasmaGeometry
+
+Struct holding plasma geometry data on the mtheta grid for vacuum calculations. Arrays are
+of length `mtheta`, where `mtheta` is the number of poloidal grid points and θ ∈ [0, 1).
+It also precomputes trigonometric basis functions needed for Fourier calculations into matrices
+of size (mtheta, mpert), where `mpert` is the number of poloidal modes.
+
+# Fields
+
+  - `x::Vector{Float64}`: Plasma surface R-coordinate on VACUUM theta grid
+  - `z::Vector{Float64}`: Plasma surface Z-coordinate on VACUUM theta grid
+  - `dx_dtheta::Vector{Float64}`: Derivative dR/dθ at plasma surface
+  - `dz_dtheta::Vector{Float64}`: Derivative dZ/dθ at plasma surface
+  - `sin_mn_basis::Matrix{Float64}`: sin(mθ - nν) basis functions for poloidal modes at plasma surface
+  - `cos_mn_basis::Matrix{Float64}`: cos(mθ - nν) basis functions for poloidal modes at plasma surface
+"""
+@kwdef struct PlasmaGeometry
+    x::Vector{Float64} = Float64[]
+    z::Vector{Float64} = Float64[]
+    ν::Vector{Float64} = Float64[]
+    dx_dtheta::Vector{Float64} = Float64[]
+    dz_dtheta::Vector{Float64} = Float64[]
+    sin_mn_basis::Matrix{Float64} = zeros(1, 1)
+    cos_mn_basis::Matrix{Float64} = zeros(1, 1)
+end
+
+"""
+    PlasmaGeometry(inputs::VacuumInput)
 
-Initialize the plasma surface geometry based on the provided vacuum inputs.
+Contructor to initialize the plasma surface geometry based on the provided vacuum inputs.
 
 This function performs functionality from `readahg`, `arrays`, and `funint` in the
 original Fortran VACUUM code. It returns a `PlasmaGeometry` struct containing
@@ -143,44 +177,204 @@ the necessary plasma surface data for vacuum calculations.
 
   - `PlasmaGeometry`: Struct containing plasma surface coordinates, derivatives, and basis functions
 """
-function initialize_plasma_surface(inputs::VacuumInput)
+function PlasmaGeometry(inputs::VacuumInput)
 
     (; mtheta, mpert, mlow, ν, r, z, n) = inputs
     # Interpolate arrays from input onto mtheta grid
-    x_plasma = interp_to_new_grid(r, mtheta)
-    z_plasma = interp_to_new_grid(z, mtheta)
+    R = interp_to_new_grid(r, mtheta)
+    Z = interp_to_new_grid(z, mtheta)
     ν = interp_to_new_grid(ν, mtheta)
 
-    # Plasma boundary theta derivative: length mth with θ = [0, 1)
+    # Plasma boundary theta derivative: for splines, need to add periodic point
     θ_grid = range(; start=0, length=mtheta, step=2π/mtheta)
-    dx_dtheta = periodic_cubic_deriv(θ_grid, x_plasma)
-    dz_dtheta = periodic_cubic_deriv(θ_grid, z_plasma)
+    θ_grid_periodic = range(; start=0, length=mtheta+1, step=2π/mtheta)
+    R_periodic = vcat(R, R[1])
+    Z_periodic = vcat(Z, Z[1])
+    dx_dtheta = periodic_cubic_deriv(θ_grid_periodic, R_periodic)
+    dz_dtheta = periodic_cubic_deriv(θ_grid_periodic, Z_periodic)
 
-    # Precompute Fourier transform terms, sin(lθ - nν) and cos(lθ - nν)
-    sin_ln_basis = zeros(Float64, mtheta, mpert)
-    cos_ln_basis = zeros(Float64, mtheta, mpert)
-    for j in 1:mpert
-        for i in 1:mtheta
-            l = mlow + j - 1
-            cos_ln_basis[i, j] = cos(l * θ_grid[i] - n * ν[i])
-            sin_ln_basis[i, j] = sin(l * θ_grid[i] - n * ν[i])
-        end
-    end
+    # Precompute Fourier transform terms, sin(mθ - nν) and cos(mθ - nν)
+    sin_mn_basis = sin.((mlow .+ (0:(mpert-1))') .* θ_grid .- n .* ν)
+    cos_mn_basis = cos.((mlow .+ (0:(mpert-1))') .* θ_grid .- n .* ν)
 
     return PlasmaGeometry(
-        x_plasma,
-        z_plasma,
+        R,
+        Z,
+        ν,
         dx_dtheta,
         dz_dtheta,
-        sin_ln_basis,
-        cos_ln_basis
+        sin_mn_basis,
+        cos_mn_basis
+    )
+end
+
+"""
+    PlasmaGeometry3D
+
+3D toroidal surface geometry for vacuum boundary integral calculations.
+
+Built by toroidally extruding a 2D poloidal contour (`PlasmaGeometry`) and computing
+Cartesian coordinates, tangent vectors, normals, and differential area elements. Note
+that the gradient/area elements are scaled by dθ and dζ.
+
+# Fields
+
+  - `mtheta::Int`: Number of poloidal grid points
+  - `nzeta::Int`: Number of toroidal grid points
+  - `num_gridpoints::Int`: Total number of surface grid points (mtheta * nzeta)
+  - `r::Matrix{Float64}`: Surface points in Cartesian (X,Y,Z), shape (num_gridpoints, 3)
+  - `dr_dθ::Matrix{Float64}`: Poloidal tangent vector ∂r/∂θ × dθ, shape (num_gridpoints, 3)
+  - `dr_dζ::Matrix{Float64}`: Toroidal tangent vector ∂r/∂ζ × dζ, shape (num_gridpoints, 3)
+  - `normal::Matrix{Float64}`: Oriented normal vectors, shape (num_gridpoints, 3)
+  - `sin_mn_basis3D::Matrix{Float64}`: sin(mθ - nν - nϕ) basis functions at plasma surface
+  - `cos_mn_basis3D::Matrix{Float64}`: cos(mθ - nν - nϕ) basis functions at plasma surface
+  - `aspect_ratio::Float64`: Ratio of max to min grid spacing for anisotropy analysis
+  - `normal_orient::Int`: Forces normals to face out from vacuum region (+1 or -1)
+"""
+@kwdef struct PlasmaGeometry3D
+    mtheta::Int = 1
+    nzeta::Int = 1
+    r::Matrix{Float64} = zeros(1, 3)
+    dr_dθ::Matrix{Float64} = zeros(1, 3)
+    dr_dζ::Matrix{Float64} = zeros(1, 3)
+    normal::Matrix{Float64} = zeros(1, 3)
+    sin_mn_basis3D::Matrix{Float64} = zeros(1, 1)
+    cos_mn_basis3D::Matrix{Float64} = zeros(1, 1)
+    aspect_ratio::Float64 = 1.0
+    normal_orient::Int = 1
+end
+
+"""
+    PlasmaGeometry3D(plasma_2d::PlasmaGeometry, nzeta::Int)
+
+Construct a 3D axisymmetric toroidal surface from a 2D poloidal contour.
+
+# Algorithm
+
+ 0. Interpolate 2D arrays onto mtheta grid
+ 1. Map 2D (R, Z, ν) to 3D Cartesian: X = R cos(ϕ+ν), Y = R sin(ϕ+ν), Z = Z
+ 2. Fit periodic bicubic splines to (X, Y, Z) on (θ, ϕ) grid
+ 3. Compute tangent vectors via spline gradients
+ 4. Compute normals via cross product: n = ∂r/∂θ × ∂r/∂ζ
+
+# Arguments
+
+  - `plasma_2d`: 2D poloidal plasma geometry
+  - `nzeta`: Number of toroidal grid points
+
+# Returns
+
+  - `PlasmaGeometry3D`: Complete 3D surface description
+"""
+function PlasmaGeometry3D(inputs::VacuumInput3D)
+
+    # Extract 2D poloidal data
+    (; mtheta, nzeta, npert, nlow, mlow, mpert) = inputs
+    num_points = mtheta * nzeta
+    dθ = 2π / mtheta
+    dζ = 2π / nzeta
+    θ_grid = range(; start=0, length=mtheta, step=dθ)
+    ϕ_grid = range(; start=0, length=nzeta, step=dζ)
+
+    # Allocate output arrays
+    r = zeros(num_points, 3)
+    normal = zeros(num_points, 3)
+    dr_dθ = zeros(num_points, 3)
+    dr_dζ = zeros(num_points, 3)
+
+    # Interpolate arrays from input onto mtheta grid (same as 2D)
+    x = interp_to_new_grid(inputs.x, mtheta)
+    z = interp_to_new_grid(inputs.z, mtheta)
+    ν = interp_to_new_grid(inputs.ν, mtheta)
+
+    # Build 3D surface point-by-point from 2D contour
+    for (i, θ) in enumerate(θ_grid), (j, ϕ) in enumerate(ϕ_grid)
+        idx = i + (j - 1) * mtheta
+        r[idx, :] .= [x[i] * cos(ϕ), x[i] * sin(ϕ), z[i]]
+    end
+
+    # Create splines for each Cartesian component (X, Y, Z) with periodic boundary conditions
+    r_grid = reshape(r, mtheta, nzeta, 3)
+    itps = [cubic_spline_interpolation((θ_grid, ϕ_grid), r_grid[:, :, k]; bc=Periodic(OnGrid())) for k in 1:3]
+
+    # Compute tangent vectors, unit normals, and differential area elements via spline interpolation
+    for (i, θ) in enumerate(θ_grid), (j, ϕ) in enumerate(ϕ_grid)
+        idx = i + (j - 1) * mtheta
+        # Compute gradients directly to avoid list comprehension allocation
+        for k in 1:3
+            g = Interpolations.gradient(itps[k], θ, ϕ)
+            dr_dθ[idx, k] = g[1]
+            dr_dζ[idx, k] = g[2]
+        end
+        normal[idx, :] = cross(dr_dθ[idx, :], dr_dζ[idx, :])
+    end
+
+    # Determine normal orientation (inward for plasma) and enforce it
+    idx = argmax(view(r, :, 1)) # outboard midplane
+    normal_orient = normal[idx, 1] < 0 ? 1 : -1
+    normal .*= normal_orient
+
+    # Warn if grid spacing is highly anisotropic
+    spacing_θ = sqrt(sum(abs2, dr_dθ) / size(dr_dθ, 1)) * dθ
+    spacing_ζ = sqrt(sum(abs2, dr_dζ) / size(dr_dζ, 1)) * dζ
+    aspect_ratio = spacing_ζ / spacing_θ
+    @info "Average grid spacing [m]: dθ=$(round(spacing_θ, digits=4)), dζ=$(round(spacing_ζ, digits=4)), aspect ratio=$(round(aspect_ratio, digits=2))"
+    aspect_ratio > 10.0 && @warn "Grid aspect ratio is highly anisotropic, which may degrade quadrature accuracy"
+
+    # Precompute Fourier transform terms, sin(lθ - nν(θ) - nϕ) and cos(lθ - nν(θ) - nϕ)
+    sin_mn_basis3D = zeros(num_points, mpert*npert)
+    cos_mn_basis3D = zeros(num_points, mpert*npert)
+    for idx_n in 1:npert, idx_m in 1:mpert
+        n = nlow + idx_n - 1
+        m = mlow + idx_m - 1
+        for (j, ϕ) in enumerate(ϕ_grid), (i, θ) in enumerate(θ_grid)
+            cos_mn_basis3D[i+(j-1)*mtheta, idx_m+(idx_n-1)*mpert] = cos(m * θ - n * (ν[i] + ϕ))
+            sin_mn_basis3D[i+(j-1)*mtheta, idx_m+(idx_n-1)*mpert] = sin(m * θ - n * (ν[i] + ϕ))
+        end
+    end
+
+    return PlasmaGeometry3D(;
+        mtheta,
+        nzeta,
+        r,
+        dr_dθ,
+        dr_dζ,
+        normal,
+        sin_mn_basis3D,
+        cos_mn_basis3D,
+        aspect_ratio,
+        normal_orient
     )
 end
 
 """
-    initialize_wall(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_settings::WallShapeSettings) -> WallGeometry
+    WallGeometry
+
+Struct holding wall geometry data for vacuum calculations. Arrays are of length
+`mtheta`, where `mtheta` is the number of poloidal grid points and θ ∈ [0, 1).
+
+# Fields
+
+  - `nowall::Bool`: Boolean flag indicating if there is no wall
+  - `is_closed_toroidal::Bool`: Boolean flag indicating if the wall is a closed toroidal surface
+  - `x::Vector{Float64}`: Wall R-coordinates
+  - `z::Vector{Float64}`: Wall Z-coordinates
+  - `dx_dtheta::Vector{Float64}`: Derivative dR/dθ at wall
+  - `dz_dtheta::Vector{Float64}`: Derivative dZ/dθ at wall
+"""
+@kwdef struct WallGeometry
+    nowall::Bool = true
+    is_closed_toroidal::Bool = true
+    x::Vector{Float64} = Float64[]
+    z::Vector{Float64} = Float64[]
+    dx_dtheta::Vector{Float64} = Float64[]
+    dz_dtheta::Vector{Float64} = Float64[]
+end
+
+"""
+    WallGeometry(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_settings::WallShapeSettings)
 
-Initialize the wall geometry based on the provided vacuum inputs and wall shape settings.
+Contructor to initialize the wall geometry based on the provided vacuum inputs and wall shape settings.
 
 This performs functionality similar to portions of the `arrays` function in the original
 Fortran VACUUM code. It returns a `WallGeometry` struct containing the necessary wall
@@ -201,108 +395,99 @@ surface data for vacuum calculations.
   - Supports multiple wall shapes: nowall, conformal, elliptical, dee, mod_dee, from_file
   - Optionally redistributes wall points to equal arc length spacing if `equal_arc_wall=true`
 """
-function initialize_wall(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_settings::WallShapeSettings)
+function WallGeometry(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_settings::WallShapeSettings)
 
     # Basic wall flags
     nowall = wall_settings.shape == "nowall"
     is_closed_toroidal = true
 
-    # All of these arrays are of length mtheta with θ = [0, 1)
+    # Output wall coordinate arrays
     mtheta = inputs.mtheta
+    x_wall = zeros(mtheta)
+    z_wall = zeros(mtheta)
+    dx_dtheta = zeros(mtheta)
+    dz_dtheta = zeros(mtheta)
+    θ_grid = range(; start=0, length=mtheta, step=2π/mtheta)
 
-    # Get wall shape from form_wall
-    # Plasma surface coordinates
+    if nowall
+        @info "Using no wall"
+        return WallGeometry(;
+            nowall=nowall,
+            is_closed_toroidal=is_closed_toroidal,
+            x=x_wall,
+            z=z_wall,
+            dx_dtheta=dx_dtheta,
+            dz_dtheta=dz_dtheta
+        )
+    end
+
+    # Compute plasma surface quantities
     x_plasma = plasma_surf.x
     z_plasma = plasma_surf.z
-
-    # Output wall coordinate arrays
-    x_wall = zeros(Float64, mtheta)
-    z_wall = zeros(Float64, mtheta)
-
-    # Common geometric parameters
     xmin = minimum(x_plasma)
     xmax = maximum(x_plasma)
     zmin = minimum(z_plasma)
     zmax = maximum(z_plasma)
-
     r_minor = 0.5 * (xmax - xmin)
     r_major = 0.5 * (xmax + xmin)
 
     # Destructuring settings for readability
     (; aw, bw, cw, dw, tw, a) = wall_settings
-    wcentr = 0.0 # Initialize
 
-    if wall_settings.shape == "nowall"
-        @info "Using no wall"
-    elseif wall_settings.shape == "conformal"
+    if wall_settings.shape == "conformal"
         dx = a * r_minor
         @info "Calculating conformal wall shape $((@sprintf "%.2e" dx)) m from plasma surface."
-        wcentr = r_major
         centerstack_min = min(0.1, 0.1 * minimum(x_plasma))  # Avoid wall crossing R=0 axis
         for i in 1:mtheta
-            j = mod1(i - 1, mtheta)
-            k = mod1(i + 1, mtheta)
-            # Normal vector calculation
-            alph = atan(x_plasma[k] - x_plasma[j], z_plasma[j] - z_plasma[k])
+            prev = mod1(i - 1, mtheta)
+            next = mod1(i + 1, mtheta)
+            # Approximate local tangent t = (dx, dz) using centered finite differences, t ≈ (dx, dz)
+            # Then, extend in normal direction, n = (-dz, dx)
+            alph = -atan(x_plasma[next] - x_plasma[prev], z_plasma[next] - z_plasma[prev])
             x_wall[i] = max(centerstack_min, x_plasma[i] + a * r_minor * cos(alph))
             z_wall[i] = z_plasma[i] + a * r_minor * sin(alph)
         end
-
         if any(x_wall .<= centerstack_min + eps(Float64))
             @warn "Conformal wall with a=$a would cross R=0 axis; forcing minimum wall R to $(@sprintf "%.2e" centerstack_min) m to avoid unphysical geometry."
         end
-
     elseif wall_settings.shape == "elliptical"
+        # TODO: need to verify I fixed these walls shapes for CCW correctly
         @info "Calculating elliptical wall shape with a = $((@sprintf "%.2e" a)) m."
-        wcentr = r_major
-
         zrad = 0.5 * (zmax - zmin)
         zh = sqrt(abs(zrad^2 - r_minor^2))
         zmuw = log((a/zh) + sqrt((a/zh)^2 + 1))
         bw_eff = (zh * cosh(zmuw)) / a
-
-        for i in 1:mtheta
-            the = (i - 1) * (2π / mtheta)
-            x_wall[i] = r_major + a * cos(the)
-            z_wall[i] = -bw_eff * a * sin(the)
+        for (i, θ) in enumerate(θ_grid)
+            x_wall[i] = r_major + a * cos(θ)
+            z_wall[i] = bw_eff * a * sin(θ)
         end
-
     elseif wall_settings.shape == "dee"
         wcentr = r_major + cw * r_minor
         @info "Calculating dee-shaped wall with R = $((@sprintf "%.2e" wcentr)) + $((@sprintf "%.2e" r_minor)) * (1.0 + $((@sprintf "%.2e" a)) - $((@sprintf "%.2e" cw))) * cos(θ + $((@sprintf "%.2e" dw)) * sin(θ)), Z = -$((@sprintf "%.2e" bw)) * $((@sprintf "%.2e" r_minor)) * (1.0 + $((@sprintf "%.2e" a)) - $((@sprintf "%.2e" cw))) * sin(θ + $((@sprintf "%.2e" tw)) * sin(2θ)) - $((@sprintf "%.2e" aw)) * $((@sprintf "%.2e" r_minor)) * sin(2θ)."
-        for i in 1:mtheta
-            the = (i - 1) * (2π / mtheta)
-            x_wall[i] = wcentr + r_minor * (1.0 + a - cw) * cos(the + dw * sin(the))
-            z_wall[i] = -bw * r_minor * (1.0 + a - cw) * sin(the + tw * sin(2.0*the)) - aw * r_minor * sin(2.0*the)
+        for (i, θ) in enumerate(θ_grid)
+            x_wall[i] = wcentr + r_minor * (1.0 + a - cw) * cos(θ + dw * sin(θ))
+            z_wall[i] = bw * r_minor * (1 + a - cw) * sin(θ + tw * sin(2 * θ)) - aw * r_minor * sin(2 * θ)
         end
-
     elseif wall_settings.shape == "mod_dee"
         @info "Calculating modified dee-shaped wall with R = $((@sprintf "%.2e" cw)) + $((@sprintf "%.2e" a)) * cos(θ + $((@sprintf "%.2e" dw)) * sin(θ)), Z = -$((@sprintf "%.2e" bw)) * $((@sprintf "%.2e" a)) * sin(θ + $((@sprintf "%.2e" tw)) * sin(2θ)) - $((@sprintf "%.2e" aw)) * sin(2θ)."
-        wcentr = cw
-        for i in 1:mtheta
-            the = (i - 1) * (2π / mtheta)
-            x_wall[i] = cw + a * cos(the + dw * sin(the))
-            z_wall[i] = -bw * a * sin(the + tw * sin(2.0*the)) - aw * sin(2.0*the)
+        for (i, θ) in enumerate(θ_grid)
+            x_wall[i] = cw + a * cos(θ + dw * sin(θ))
+            z_wall[i] = bw * a * sin(θ + tw * sin(2 * θ)) - aw * sin(2 * θ)
         end
-
     else
         filepath = wall_settings.shape
         !isfile(filepath) && @error "ERROR: Wall geometry file $filepath does not exist.
             Please set the wall shape parameter to a valid file path or a built-in shape (nowall, conformal, elliptical, dee, mod_dee)."
-
-        wcentr = 0.0
         open(wall_settings.shape, "r") do io
             npots0 = parse(Int, readline(io))  # Number of points in file
-            wcentr = parse(Float64, readline(io))
+            readline(io) # Skip wcentr line
             readline(io) # Skip header/comment line
-
             (npots0 < mtheta) && @error "ERROR: $filename contains fewer points ($npots0) than mtheta ($mtheta)."
-
             for i in 1:mtheta
                 line = split(readline(io))
                 # Assumes file format: [index  R_coord  Z_coord]
-                x_wall[i] = parse(Float64, line[2])
-                z_wall[i] = parse(Float64, line[3])
+                x_wall[i] = parse(line[2])
+                z_wall[i] = parse(line[3])
             end
         end
     end
@@ -310,9 +495,7 @@ function initialize_wall(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_
     # Optional: Re-parameterization
     if wall_settings.equal_arc_wall && (wall_settings.shape != "nowall")
         @info "Re-distributing wall points to equal arc length spacing"
-        if !is_closed_toroidal
-            @error "Wall is not closed toroidally; equal arc length distribution assumes periodicity as cannot be safely used."
-        end
+        !is_closed_toroidal && error("Wall is not closed toroidally; equal arc length distribution assumes periodicity as cannot be safely used.")
         x_wall, z_wall, _, theta_grid, _ = distribute_to_equal_arc_grid(x_wall, z_wall, mtheta)
         theta_grid .= theta_grid .* (2π)  # Scale to [0, 2π) - irregular spacing
         fx_of_theta = interpolate((theta_grid,), x_wall, Gridded(Linear()))
@@ -321,22 +504,207 @@ function initialize_wall(inputs::VacuumInput, plasma_surf::PlasmaGeometry, wall_
         dz_dtheta = only.(Interpolations.gradient.(Ref(fz_of_theta), theta_grid))
     else
         # used regular theta grid spacing to build wall
-        theta_grid = range(0; stop=2π, length=mtheta + 1)[1:(end-1)] # length mtheta without endpoint
-        dx_dtheta = periodic_cubic_deriv(theta_grid, x_wall)
-        dz_dtheta = periodic_cubic_deriv(theta_grid, z_wall)
+        dx_dtheta = periodic_cubic_deriv(θ_grid, x_wall)
+        dz_dtheta = periodic_cubic_deriv(θ_grid, z_wall)
     end
 
-    if any(x_wall .<= 0.0) && !nowall
-        # to add support for x<0 walls, be sure to carefully replicate Chance's fortran code x<0 handling in the kernel function to account for the additional singularities associated with this
-        error("Wall R-coordinates contain non-physical values (R <= 0). Check wall geometry.")
+    # to add support for x<0 walls, be sure to carefully replicate Chance's fortran code x<0 handling in the kernel function to account for the additional singularities associated with this
+    any(x_wall .<= 0.0) && error("Wall R-coordinates contain non-physical values (R <= 0). Check wall geometry.")
+
+    return WallGeometry(;
+        nowall=nowall,
+        is_closed_toroidal=is_closed_toroidal,
+        x=x_wall,
+        z=z_wall,
+        dx_dtheta=dx_dtheta,
+        dz_dtheta=dz_dtheta
+    )
+end
+
+"""
+    WallGeometry3D
+
+Struct holding wall geometry data for vacuum calculations. Arrays are of length
+`mtheta`, where `mtheta` is the number of poloidal grid points and θ ∈ [0, 1).
+
+# Fields
+
+  - `nowall::Bool`: Boolean flag indicating if there is no wall
+  - `is_closed_toroidal::Bool`: Boolean flag indicating if the wall is a closed toroidal surface
+  - `mtheta::Int`: Number of poloidal grid points
+  - `nzeta::Int`: Number of toroidal grid points
+  - `r::Matrix{Float64}`: (x, y, z) wall coordinates at each grid point
+  - `dr_dθ::Matrix{Float64}`: Derivative dR/dθ at wall
+  - `dr_dζ::Matrix{Float64}`: Derivative dR/dζ at wall
+  - `normal::Matrix{Float64}`: Outward normal vectors at wall
+"""
+@kwdef struct WallGeometry3D
+    nowall::Bool = true
+    is_closed_toroidal::Bool = true
+    mtheta::Int = 1
+    nzeta::Int = 1
+    r::Matrix{Float64} = zeros(1, 3)
+    dr_dθ::Matrix{Float64} = zeros(1, 3)
+    dr_dζ::Matrix{Float64} = zeros(1, 3)
+    normal::Matrix{Float64} = zeros(1, 3)
+    normal_orient::Int = 1
+end
+
+"""
+    WallGeometry3D(inputs::VacuumInput3D, plasma_surf::PlasmaGeometry3D, wall_settings::WallShapeSettings)
+
+Contructor to initialize the 3D wall geometry based on the provided vacuum inputs and wall shape settings.
+Currently only works for axisymmetric walls generated by toroidal extrusion of 2D poloidal contours.
+
+# Arguments
+
+  - `inputs::VacuumInput3D`: Struct containing vacuum calculation parameters
+  - `plasma_surf::PlasmaGeometry3D`: Struct with plasma surface geometry (used for reference)
+  - `wall_settings::WallShapeSettings`: Struct specifying wall shape and parameters
+
+# Returns
+
+  - `WallGeometry`: Struct containing wall surface coordinates and derivatives
+
+# Notes
+
+  - Supports multiple wall shapes: nowall, conformal, elliptical, dee, mod_dee, from_file
+  - Optionally redistributes wall points to equal arc length spacing if `equal_arc_wall=true`
+"""
+function WallGeometry3D(inputs::VacuumInput3D, plasma_surf::PlasmaGeometry3D, wall_settings::WallShapeSettings)
+
+    # Basic wall flags
+    nowall = wall_settings.shape == "nowall"
+    is_closed_toroidal = true
+
+    (; mtheta, nzeta) = inputs
+    dθ = 2π / mtheta
+    dζ = 2π / nzeta
+    θ_grid = range(; start=0, length=mtheta, step=dθ)
+    ϕ_grid = range(; start=0, length=nzeta, step=dζ)
+    num_points = mtheta * nzeta
+
+    # Output wall coordinate arrays
+    r = zeros(num_points, 3)
+    normal = zeros(num_points, 3)
+    dr_dθ = zeros(num_points, 3)
+    dr_dζ = zeros(num_points, 3)
+    normal_orient = 1
+
+    if nowall
+        @info "Using no wall"
+        return WallGeometry3D(
+            nowall,
+            is_closed_toroidal,
+            mtheta,
+            nzeta,
+            r,
+            dr_dθ,
+            dr_dζ,
+            normal,
+            normal_orient
+        )
     end
 
-    return WallGeometry(
+    # Plasma surface coordinates (2D)
+    x_plasma = plasma_surf.r[1:plasma_surf.mtheta, 1]
+    z_plasma = plasma_surf.r[1:plasma_surf.mtheta, 3]
+    xmin = minimum(x_plasma)
+    xmax = maximum(x_plasma)
+    zmin = minimum(z_plasma)
+    zmax = maximum(z_plasma)
+    r_minor = 0.5 * (xmax - xmin)
+    r_major = 0.5 * (xmax + xmin)
+
+    # Destructuring settings for readability
+    (; aw, bw, cw, dw, tw, a) = wall_settings
+
+    if wall_settings.shape == "conformal"
+        dx = a * r_minor
+        @info "Calculating conformal wall shape $((@sprintf "%.2e" dx)) m from plasma surface."
+        centerstack_min = min(0.1, 0.1 * minimum(x_plasma))
+        for (j, ϕ) in enumerate(ϕ_grid), i in 1:mtheta
+            idx = i + (j - 1) * mtheta
+            prev = mod1(i - 1, mtheta)
+            next = mod1(i + 1, mtheta)
+            # Approximate local tangent t = (dx, dz) using centered finite differences, t ≈ (dx, dz)
+            # Then, extend in normal direction, n = (-dz, dx)
+            alph = -atan(x_plasma[next] - x_plasma[prev], z_plasma[next] - z_plasma[prev])
+            R_wall = max(centerstack_min, x_plasma[i] + a * r_minor * cos(alph))
+            Z_wall = z_plasma[i] + a * r_minor * sin(alph)
+            # Map to Cartesian (X, Y, Z)
+            r[idx, :] .= [R_wall * cos(ϕ), R_wall * sin(ϕ), Z_wall]
+        end
+
+        if any(sqrt.(r[:, 1] .^ 2 .+ r[:, 2] .^ 2) .<= centerstack_min + eps(Float64))
+            @warn "Conformal wall with a=$a would cross R=0 axis; forcing minimum wall R to $(@sprintf "%.2e" centerstack_min) m to avoid unphysical geometry."
+        end
+    elseif wall_settings.shape == "elliptical"
+        @info "Calculating elliptical wall shape with a = $((@sprintf "%.2e" a)) m."
+        zrad = 0.5 * (zmax - zmin)
+        zh = sqrt(abs(zrad^2 - r_minor^2))
+        zmuw = log((a/zh) + sqrt((a/zh)^2 + 1))
+        bw_eff = (zh * cosh(zmuw)) / a
+        for (j, ϕ) in enumerate(ϕ_grid), (i, θ) in enumerate(θ_grid)
+            idx = i + (j - 1) * mtheta
+            r[idx, :] .= [(r_major + a * cos(θ)) * cos(ϕ), (r_major + a * cos(θ)) * sin(ϕ), bw_eff * a * sin(θ)]
+        end
+    elseif wall_settings.shape == "dee"
+        error("Dee-shaped walls not yet implemented for 3D walls.")
+    elseif wall_settings.shape == "mod_dee"
+        error("Modified Dee-shaped walls not yet implemented for 3D walls.")
+    else
+        filepath = wall_settings.shape
+        !isfile(filepath) && error("ERROR: Wall geometry file $filepath does not exist.
+            Please set the wall shape parameter to a valid file path or a built-in shape (nowall, conformal, elliptical, dee, mod_dee).")
+
+        open(filepath, "r") do io
+            npots0 = parse(Int, readline(io))
+            (npots0 != num_points) && error("ERROR: $filepath contains different points ($npots0) than mtheta * nzeta ($num_points).")
+            # TODO: add an interpolation here for if they're different
+            for i in 1:num_points
+                line = split(readline(io))
+                r[i, 1] = parse(Float64, line[1])
+                r[i, 2] = parse(Float64, line[2])
+                r[i, 3] = parse(Float64, line[3])
+            end
+        end
+    end
+
+    # Optional: Re-parameterization
+    if wall_settings.equal_arc_wall && (wall_settings.shape != "nowall")
+        error("Re-distributing wall points to equal arc length spacing not implemented for 3D walls yet.")
+    end
+
+    # Create splines for each Cartesian component (X, Y, Z) with periodic boundary conditions
+    r_grid = reshape(r, mtheta, nzeta, 3)
+    itps = [cubic_spline_interpolation((θ_grid, ϕ_grid), r_grid[:, :, k]; bc=Periodic(OnGrid())) for k in 1:3]
+
+    # Compute tangent vectors, normals, and differential area elements
+    for (i, θ) in enumerate(θ_grid), (j, ϕ) in enumerate(ϕ_grid)
+        idx = i + (j - 1) * mtheta
+        for k in 1:3
+            g = Interpolations.gradient(itps[k], θ, ϕ)
+            dr_dθ[idx, k] = g[1]
+            dr_dζ[idx, k] = g[2]
+        end
+        normal[idx, :] = cross(dr_dθ[idx, :], dr_dζ[idx, :])
+    end
+
+    # Determine normal orientation (outward for wall) and enforce it
+    idx = argmax(view(r, :, 1)) # outboard midplane
+    normal_orient = normal[idx, 1] > 0 ? 1 : -1
+    @views normal .*= normal_orient
+
+    return WallGeometry3D(
         nowall,
         is_closed_toroidal,
-        x_wall,
-        z_wall,
-        dx_dtheta,
-        dz_dtheta
+        mtheta,
+        nzeta,
+        r,
+        dr_dθ,
+        dr_dζ,
+        normal,
+        normal_orient
     )
 end
diff --git a/src/Vacuum/Kernel2D.jl b/src/Vacuum/Kernel2D.jl
index f2882fd0..becc8386 100644
--- a/src/Vacuum/Kernel2D.jl
+++ b/src/Vacuum/Kernel2D.jl
@@ -1,10 +1,3 @@
-# Gaussian quadrature weights and points for 8-point integration (used for kernel! function)
-const GAUSSIANWEIGHTS = [0.101228536290376, 0.222381034453374, 0.313706645877887, 0.362683783378362,
-    0.362683783378362, 0.313706645877887, 0.222381034453374, 0.101228536290376]
-
-const GAUSSIANPOINTS = [-0.960289856497536, -0.796666477413627, -0.525532409916329, -0.183434642495650,
-    0.183434642495650, 0.525532409916329, 0.796666477413627, 0.960289856497536]
-
 # 32-point Gaussian quadrature abscissae (used for Pn_minus_half_2007 function when nρ̂>0.1)
 const GAUSSIANWEIGHTS32 = [
     0.007018610009470096600, 0.016274394730905670605,
@@ -44,8 +37,56 @@ const GAUSSIANPOINTS32 = [
     0.985611511545268335400, 0.997263861849481563545
 ]
 
+# Cache for Lagrange stencils keyed by Gaussian order
+const LAGRANGE_STENCIL_CACHE = Dict{Int,Tuple{Vector{SVector{5,Float64}},Vector{SVector{5,Float64}}}}()
+
+"""
+    precompute_lagrange_stencils(gaussian_points)
+
+Precompute 5-point Lagrange interpolation stencils for Gaussian quadrature points.
+
+Returns a tuple `(left, right)` where each entry is a Vector of SVector{5,Float64}
+containing the stencil weights for points on the left/right panel.
+"""
+function precompute_lagrange_stencils(gaussian_points::AbstractVector{<:Real})
+    stencil_points = SVector(-2, -1, 0, 1, 2)
+    npts = length(gaussian_points)
+    left = Vector{SVector{5,Float64}}(undef, npts)
+    right = Vector{SVector{5,Float64}}(undef, npts)
+
+    for ig in 1:npts
+        p_left = -1.0 + gaussian_points[ig]
+        p_right = 1.0 + gaussian_points[ig]
+
+        left[ig] = ntuple(5) do i
+            xi = stencil_points[i]
+            prod(j -> j == i ? 1.0 : (p_left - stencil_points[j]) / (xi - stencil_points[j]), 1:5)
+        end |> SVector
+
+        right[ig] = ntuple(5) do i
+            xi = stencil_points[i]
+            prod(j -> j == i ? 1.0 : (p_right - stencil_points[j]) / (xi - stencil_points[j]), 1:5)
+        end |> SVector
+    end
+
+    return left, right
+end
+
+"""
+    get_lagrange_stencils(gaussian_points)
+
+Return cached 5-point Lagrange stencils keyed by Gaussian order. Initializes
+and caches the stencils on first use for a given order.
 """
-    kernel!(grad_greenfunction_mat, greenfunction_mat, x_obspoints, z_obspoints, x_sourcepoints, z_sourcepoints, j1, j2, isgn, iops, inputs; xwall=nothing, zwall=nothing)
+function get_lagrange_stencils(gaussian_points::AbstractVector{<:Real})
+    order = length(gaussian_points)
+    return get!(LAGRANGE_STENCIL_CACHE, order) do
+        precompute_lagrange_stencils(gaussian_points)
+    end
+end
+
+"""
+    kernel!(grad_greenfunction, greenfunction, observer, source, n)
 
 Compute kernels of integral equation for Laplace's equation in a torus.
 
@@ -54,20 +95,17 @@ The residue calculation needs to be updated for open walls.**
 
 # Arguments
 
-  - `grad_greenfunction_mat`: Gradient Green's function matrix (output)
-  - `greenfunction_mat`: Green's function matrix (output)
-  - `x_obspoints`: Observer x coordinates (R coordinates)
-  - `z_obspoints`: Observer z coordinates (Z coordinates)
-  - `x_sourcepoints`: Source x coordinates (R coordinates)
-  - `z_sourcepoints`: Source z coordinates (Z coordinates)
-  - `j1/j2`: Block index for observer/source (1=plasma, 2=wall)
+  - `grad_greenfunction`: Gradient Green's function matrix (output)
+  - `greenfunction`: Green's function matrix (output)
+  - `observer`: Observer geometry struct (PlasmaGeometry or WallGeometry)
+  - `source`: Source geometry struct (PlasmaGeometry or WallGeometry)
   - `n`: Toroidal mode number
 
 # Returns
 
-Modifies `grad_greenfunction_mat` and `greenfunction_mat` in place.
-Note that greenfunction_mat is zeroed each time this function is called,
-but grad_greenfunction_mat is not since it fills a different block of the
+Modifies `grad_greenfunction` and `greenfunction` in place.
+Note that greenfunction is zeroed each time this function is called,
+but grad_greenfunction is not since it fills a different block of the
 (2 * mtheta, 2 * mtheta) depending on the source/observer.
 
 # Notes
@@ -77,152 +115,142 @@ but grad_greenfunction_mat is not since it fills a different block of the
   - Implements analytical singularity removal following Chance 1997
 """
 function kernel!(
-    grad_greenfunction_mat::Matrix{Float64},
-    greenfunction_mat::Matrix{Float64},
-    x_obspoints::Vector{Float64},
-    z_obspoints::Vector{Float64},
-    x_sourcepoints::Vector{Float64},
-    z_sourcepoints::Vector{Float64},
-    j1::Int,
-    j2::Int,
-    n::Int
+    grad_greenfunction::Matrix{Float64},
+    greenfunction::Matrix{Float64},
+    observer::Union{PlasmaGeometry,WallGeometry},
+    source::Union{PlasmaGeometry,WallGeometry},
+    n::Int;
+    GAUSS_ORDER::Int=8
 )
 
-    # These used to be function arguments, but can just set inside here based on j1/j2
-    plasma_plasma_block = j1 == 1 && j2 == 1 # previously iops
-    plasma_is_source = j2 == 1 # previously iopw
-    isgn = plasma_is_source ? -1 : 1
-
-    mtheta = length(x_obspoints)
+    mtheta = length(observer.x)
     dtheta = 2π / mtheta
     theta_grid = range(; start=0, length=mtheta, step=dtheta)
 
-    # Zero out greenfunction_mat at start of each kernel call (matches Fortran behavior)
-    fill!(greenfunction_mat, 0.0)
+    # Take a view of the corresponding block of the grad_greenfunction
+    col_index = (source isa PlasmaGeometry ? 1 : 2)
+    row_index = (observer isa PlasmaGeometry ? 1 : 2)
+    grad_greenfunction_block = view(
+        grad_greenfunction,
+        ((row_index-1)*mtheta+1):(row_index*mtheta),
+        ((col_index-1)*mtheta+1):(col_index*mtheta)
+    )
 
-    if mtheta != length(z_obspoints) || mtheta != length(x_sourcepoints) || mtheta != length(z_sourcepoints)
-        error("Length of input arrays (xobs, zobs, xsource, zsce) are different. All length should be the same")
-    end
+    # Zero out greenfunction at start of each kernel call
+    fill!(greenfunction, 0.0)
+    # 𝒢ⁿ only needed for plasma as source term (RHS of eqs. 26/27 in Chance 1997)
+    populate_greenfunction = source isa PlasmaGeometry
 
     # S₁ᵢ in Chance 1997, eq.(78)
     log_correction_0=16.0*dtheta*(log(2*dtheta)-68.0/15.0)/15.0
     log_correction_1=128.0*dtheta*(log(2*dtheta)-8.0/15.0)/45.0
     log_correction_2=4.0*dtheta*(7.0*log(2*dtheta)-11.0/15.0)/45.0
-
-    # Used for Z'_θ and X'_θ in eq.(51)
-    spline_x = cubic_spline_interpolation(theta_grid, x_sourcepoints; extrapolation_bc=Interpolations.Periodic())
-    spline_z = cubic_spline_interpolation(theta_grid, z_sourcepoints; extrapolation_bc=Interpolations.Periodic())
-    dx_dtheta = [Interpolations.gradient(spline_x, t)[1] for t in theta_grid]
-    dz_dtheta = [Interpolations.gradient(spline_z, t)[1] for t in theta_grid]
+    log_correction = [log_correction_2, log_correction_1, log_correction_0, log_correction_1, log_correction_2]
+
+    # Precompute composite Simpson's 1/3 rule weights, excluding singular points
+    # Note we set to 4 for even/2 for odd since we index from 1 while the formula assumes indexing from 0
+    nsrc = mtheta - 3
+    simpson_weights = dtheta / 3 .* [(k == 1 || k == nsrc) ? 1 : (iseven(k) ? 4 : 2) for k in 1:nsrc]
+
+    # Precompute quantities for Gaussian quadrature
+    GAUSSIANPOINTS, GAUSSIANWEIGHTS = FastGaussQuadrature.gausslegendre(GAUSS_ORDER) # on [-1, 1]
+    wgauss = GAUSSIANWEIGHTS .* dtheta # scale to [-Δθ, Δθ]
+    xgauss_left = -dtheta .+ GAUSSIANPOINTS .* dtheta # [-2Δθ, 0]
+    xgauss_right = dtheta .+ GAUSSIANPOINTS .* dtheta # [0, 2Δθ]
+    lagrange_left, lagrange_right = get_lagrange_stencils(GAUSSIANPOINTS)
+
+    # TODO: this isn't the same as the periodic_cubic_deriv interpolation?
+    # We need to interpolate off-grid during Gaussian quadrature
+    # THIS IS A BUG: extrapolations BC assumes both endpoints are included in the data, so this wraps at mtheta-1 to 0 instead of mtheta to 0
+    # The correct form is:
+    # theta_grid_periodic = range(; start=0, length=mtheta+1, step=dtheta)
+    # R_periodic = vcat(source.x, source.x[1])
+    # spline_x = cubic_spline_interpolation(theta_grid_periodic, R_periodic, bc = Periodic(OnGrid()); extrapolation_bc=Interpolations.Periodic())
+    # We can drop the extrapolation condition since we do mod(theta_gauss[ig], 2π) in the Gaussian quadrature loop below, or leave it here and drop that. They are redundant
+    spline_x = cubic_spline_interpolation(theta_grid, source.x; extrapolation_bc=Interpolations.Periodic())
+    spline_z = cubic_spline_interpolation(theta_grid, source.z; extrapolation_bc=Interpolations.Periodic())
 
     # Loop through observer points
     for j in 1:mtheta
-        # Initialize variables
-        x_obs=x_obspoints[j]
-        z_obs=z_obspoints[j]
-        theta_obs=theta_grid[j]
-        grad_green_0 = 0.0 # simpson integral for coupling_0 (𝒥 ∇'𝒢⁰∇'ℒ)
-        # Workspace = view of appropriate row of grad_greenfunction_mat for this observer point
-        grad_green_work = @view(grad_greenfunction_mat[(j1-1)*mtheta+j, (j2-1)*mtheta .+ (1:mtheta)])
+        # Get observer coordinates
+        x_obs, z_obs, theta_obs = observer.x[j], observer.z[j], theta_grid[j]
 
         # Obtain nonsingular region (endpoints at j+2 and j-2, so exclude j-1, j, and j+1)
-        nonsing_src_indices = mod1.((j+2):(j+mtheta-2), mtheta) # mod1 ensures isrc is in [1, mtheta]
-
-        # Compute composite Simpson's 1/3 rule weights (https://en.wikipedia.org/wiki/Simpson%27s_rule#Composite_Simpson's_1/3_rule)
-        # Note we set to 4 for even/2 for odd since we index from 1 while the formula assumes indexing from 0
-        nsrc = length(nonsing_src_indices)
-        simpson_weights = dtheta / 3 .* [(k == 1 || k == nsrc) ? 1 : (iseven(k) ? 4 : 2) for k in 1:nsrc]
+        nonsing_idx = mod1.((j+2):(j+mtheta-2), mtheta) # mod1 ensures isrc is in [1, mtheta]
 
         # Perform Simpson integration for nonsingular source points
-        for (isrc, wsimpson) in zip(nonsing_src_indices, simpson_weights)
-            x_source=x_sourcepoints[isrc]
-            z_source=z_sourcepoints[isrc]
-
-            # G_n is 2pi𝒢ⁿ; coupling_n is 𝒥 ∇'𝒢ⁿ∇'ℒ; coupling_0 is 𝒥 ∇'𝒢ⁿ∇'ℒ for n=0
-            G_n, coupling_n, coupling_0 = green(x_obs, z_obs, x_source, z_source, dx_dtheta[isrc], dz_dtheta[isrc], n)
+        for (isrc, wsimpson) in zip(nonsing_idx, simpson_weights)
+            G_n, gradG_n, gradG_0 = green(x_obs, z_obs, source.x[isrc], source.z[isrc], source.dx_dtheta[isrc], source.dz_dtheta[isrc], n)
 
             # Sum contributions to Green's function matrices using Simpson weight
-            grad_green_work[isrc] += isgn * coupling_n * wsimpson
-            greenfunction_mat[j, isrc] += G_n * wsimpson
-            grad_green_0 += coupling_0 * wsimpson
+            if populate_greenfunction
+                greenfunction[j, isrc] += G_n * wsimpson
+            end
+            grad_greenfunction_block[j, isrc] += gradG_n * wsimpson
+            # Subtract regular integral component of δⱼᵢK⁰ in eq. 83
+            grad_greenfunction_block[j, j] -= gradG_0 * wsimpson
         end
 
         # Perform Gaussian quadrature for singular points (source = obs point)
         # Get indices of the singularity region, [j-2, j-1, j, j+1, j+2]
-        js = mod.(j .+ ((mtheta-3):(mtheta+1)), mtheta) .+ 1
+        sing_idx = mod1.(j .+ ((mtheta-2):(mtheta+2)), mtheta)
         # Integrate region of length 2 * dtheta on left/right of singularity
         for region in ["left", "right"]
-            gauss_xleft = theta_obs - (region == "left" ? 2 * dtheta : 0)
-            gauss_xright = gauss_xleft + 2 * dtheta
-            gauss_xavg = (gauss_xright + gauss_xleft)/2
-            theta_gauss = gauss_xavg .+ GAUSSIANPOINTS .* dtheta # tgaus is 8 point gauss points, since GAUSSIANPOINTS is for only [-1,1]
-            for ig in 1:8 # 8-point Gaussian quadrature
+            # Get precomputed quadrature data
+            lagrange_stencil = (region == "left") ? lagrange_left : lagrange_right
+            theta_gauss = theta_obs .+ (region == "left" ? xgauss_left : xgauss_right)
+            for ig in 1:GAUSS_ORDER
                 # Compute green function for this Gaussian point
                 theta_gauss0 = mod(theta_gauss[ig], 2π)
                 x_gauss = spline_x(theta_gauss0)
                 dx_dtheta_gauss = Interpolations.gradient(spline_x, theta_gauss0)[1]
                 z_gauss = spline_z(theta_gauss0)
                 dz_dtheta_gauss = Interpolations.gradient(spline_z, theta_gauss0)[1]
-                G_n, coupling_n, coupling_0 = green(x_obs, z_obs, x_gauss, z_gauss, dx_dtheta_gauss, dz_dtheta_gauss, n)
-
-                # Add logarithm to G_n to analytically isolate the singularity (first type), Chance eq.(75)
-                G_n_nonsingular = plasma_plasma_block ? G_n + log((theta_obs-theta_gauss[ig])^2)/x_obs : G_n
-
-                # Redefine hardcoded Gaussian weights on the interval [-1, 1] to physical interval with length 2 * dtheta
-                wgauss = GAUSSIANWEIGHTS[ig] * dtheta
-                # Calculate p = θ/Δ = (θⱼ - θ')/Δ
-                pgauss=(theta_gauss[ig]-theta_obs)/dtheta
-                # Compute 5-point Lagrange basis polynomials at the Gauss point and multiply by quadrature weight
-                A0 = (pgauss^2-1)*(pgauss^2-4)/4.0 * wgauss
-                A1_plus = -(pgauss+1)*pgauss*(pgauss^2-4)/6.0 * wgauss
-                A1_minus = -(pgauss-1)*pgauss*(pgauss^2-4)/6.0 * wgauss
-                A2_plus = (pgauss^2-1)*pgauss*(pgauss+2)/24.0 * wgauss
-                A2_minus = (pgauss^2-1)*pgauss*(pgauss-2)/24.0 * wgauss
-
-                # First type of singularity: 𝒢ⁿ, occurs plasma as source only (see RHS of Chance eqs. 26/27)
-                if plasma_is_source
-                    greenfunction_mat[j, js[1]] += G_n_nonsingular * A2_minus
-                    greenfunction_mat[j, js[2]] += G_n_nonsingular * A1_minus
-                    greenfunction_mat[j, js[3]] += G_n_nonsingular * A0
-                    greenfunction_mat[j, js[4]] += G_n_nonsingular * A1_plus
-                    greenfunction_mat[j, js[5]] += G_n_nonsingular * A2_plus
+                G_n, gradG_n, gradG_0 = green(x_obs, z_obs, x_gauss, z_gauss, dx_dtheta_gauss, dz_dtheta_gauss, n)
+
+                # First type of singularity: 𝒢ⁿ (Eq. 75: 2π𝒢ⁿ + log(θ-θ')²/X')
+                if populate_greenfunction
+                    if observer isa PlasmaGeometry
+                        # Remove singular behavior by adding on leading-order term, Chance eq.(75)
+                        G_n += log((theta_obs - theta_gauss[ig])^2) / x_obs
+                    end
+                    @. @views greenfunction[j, sing_idx] += wgauss[ig] * G_n * lagrange_stencil[ig]
                 end
 
-                # Second type of singularity: 𝒦ⁿ
-                # Eq. 86: 𝒦ⁿαᵢ - δⱼᵢK⁰
-                grad_green_work[js[1]] += isgn * coupling_n * A2_minus
-                grad_green_work[js[2]] += isgn * coupling_n * A1_minus
-                grad_green_work[js[3]] += isgn * coupling_n * A0
-                grad_green_work[js[4]] += isgn * coupling_n * A1_plus
-                grad_green_work[js[5]] += isgn * coupling_n * A2_plus
-                # Subtract off the diverging singular n=0 component
-                grad_green_work[j] -= isgn * coupling_0 * wgauss
+                # Second type of singularity: 𝒦ⁿ (Eq. 86: 𝒦ⁿαᵢ - δⱼᵢK⁰)
+                @. @views grad_greenfunction_block[j, sing_idx] += wgauss[ig] * gradG_n * lagrange_stencil[ig]
+                grad_greenfunction_block[j, j] -= gradG_0 * wgauss[ig]
             end
         end
 
-        # Set residue based on logic similar to Table I of Chance 1997 + existing δⱼᵢ in eq. 69
-        # Would need to pass in wall geometry to generalize this to open walls
-        is_closed_toroidal = true
-        if is_closed_toroidal
-            residue = (j1 == 2.0) ? 0.0 : (j2 == 1 ? 2.0 : -2.0) # Chance eq. 89
-        else
-            # TODO: this line can be gotten rid of if we are never doing open walls
-            residue = (j1 == j2) ? 2.0 : 0.0 # Chance eq. 90
-        end
-        # Subtract regular integral component of δⱼᵢK⁰ in eq. 83 and add residue value in eq. 89/90
-        grad_green_work[j] = grad_green_work[j] - isgn * grad_green_0 + residue
-
-        # Subtract off analytic singular integral from Chance eq.(75) if plasma-plasma block
-        if plasma_plasma_block
-            greenfunction_mat[j, js[1]] -= log_correction_2 / x_obs
-            greenfunction_mat[j, js[2]] -= log_correction_1 / x_obs
-            greenfunction_mat[j, js[3]] -= log_correction_0 / x_obs
-            greenfunction_mat[j, js[4]] -= log_correction_1 / x_obs
-            greenfunction_mat[j, js[5]] -= log_correction_2 / x_obs
+        # Add analytic singular integral (first type) from Chance eq. 78
+        if populate_greenfunction && observer isa PlasmaGeometry
+            @. @views greenfunction[j, sing_idx] -= log_correction / x_obs
         end
     end
+
+    # Normals need to point outward from vacuum region. In CCW θ convention, normal points
+    # out of vacuum for plasma but inward for wall, so we multiply by -1 for wall sources
+    if source isa WallGeometry
+        grad_greenfunction_block .*= -1
+    end
+
     # Since we computed 2π𝒢, divide by 2π to get 𝒢
-    greenfunction_mat ./= 2π
+    greenfunction ./= 2π
+
+    # Add analytic singular integral (second type) from Table I of Chance 1997 + existing δⱼᵢ in eq. 69
+    # Would need to pass in wall geometry to generalize this to open walls
+    is_closed_toroidal = true
+    if is_closed_toroidal # Chance eq. 89
+        residue = (observer isa WallGeometry) ? 0.0 : (source isa PlasmaGeometry ? 2.0 : -2.0)
+    else # Chance eq. 90
+        # TODO: this line can be gotten rid of if we are never doing open walls
+        residue = (typeof(observer) == typeof(source)) ? 2.0 : 0.0
+    end
+    # Add residue value from eq. 89/90 to block diagonal
+    @inbounds for i in 1:mtheta
+        grad_greenfunction_block[i, i] += residue
+    end
 end
 
 #############################################################
@@ -257,7 +285,6 @@ function elliptic_integral_k(m1)
     return ellipk
 end
 
-
 """
     This function is different from elliptic integral E(k). Be careful.
 
@@ -285,7 +312,6 @@ function elliptic_integral_e(m1)
 
 end
 
-
 # Chance 1997 eq.(49) (original)
 function P0_minus_half(s)
     m1 = 2 / (s + 1)
diff --git a/src/Vacuum/Kernel3D.jl b/src/Vacuum/Kernel3D.jl
new file mode 100644
index 00000000..d7948c28
--- /dev/null
+++ b/src/Vacuum/Kernel3D.jl
@@ -0,0 +1,530 @@
+const INV_4PI = 1.0 / (4π)
+
+"""
+    periodic_wrap(x, n) -> Int
+
+Inline function for fast periodic wrapping for indices near the valid range [1, n].
+Equivalent to `mod1(x, n)` but avoids division for small offsets.
+Only valid when `x` is within one period of the valid range (i.e., `1-n < x < 2n`).
+"""
+@inline periodic_wrap(x::Int, n::Int) = x < 1 ? x + n : (x > n ? x - n : x)
+
+"""
+    SingularQuadratureData
+
+Precomputed data for singular correction quadrature following BIEST approach.
+Initialized once on first use.
+
+## Fields
+
+    - `qx::Vector{Float64}`: Radial quadrature points in [0,1]
+    - `qw::Vector{Float64}`: Radial quadrature weights
+    - `Gpou::Matrix{Float64}`: Partition of unity on Cartesian grid (PATCH_DIM × PATCH_DIM)
+    - `Ppou::Matrix{Float64}`: Partition of unity on polar grid (RAD_DIM × ANG_DIM)
+    - `P2G::SparseMatrixCSC{Float64,Int}`: Sparse interpolation matrix (Ngrid × Npolar) mapping polar quadrature points to Cartesian grid
+        - Forward (patch→polar): `polar = P2G' * patch`
+        - Backward (polar→grid): `grid = P2G * polar`.
+    - `PATCH_DIM::Int`: Patch dimension (odd integer)
+    - `PATCH_RAD::Int`: Patch radius (number of points adjacent to source point treated as singular)
+    - `ANG_DIM::Int`: Number of angular quadrature points
+    - `RAD_DIM::Int`: Number of radial quadrature points
+    - `INTERP_ORDER::Int`: Lagrange interpolation order
+"""
+struct SingularQuadratureData
+    qx::Vector{Float64}
+    qw::Vector{Float64}
+    Gpou::Matrix{Float64}
+    Ppou::Matrix{Float64}
+    P2G::SparseMatrixCSC{Float64,Int}
+    PATCH_DIM::Int
+    PATCH_RAD::Int
+    ANG_DIM::Int
+    RAD_DIM::Int
+    INTERP_ORDER::Int
+end
+
+"""
+    SingularQuadratureData(PATCH_RAD::Int, RAD_DIM::Int, INTERP_ORDER::Int)
+
+Constructor which initializes quadrature points, weights, partition-of-unity functions, and
+interpolation matrices for singular correction based on input parameters. Follows BIEST's approach.
+
+# Arguments
+
+  - `PATCH_RAD::Int`: Number of points adjacent to source point to treat as singular
+  - `RAD_DIM::Int`: Radial quadrature order
+  - `INTERP_ORDER::Int`: Lagrange interpolation order
+
+# Returns
+
+  - `SingularQuadratureData`: Precomputed quadrature data
+"""
+function SingularQuadratureData(PATCH_RAD::Int, RAD_DIM::Int, INTERP_ORDER::Int)
+
+    # Total size of square patch extracted around singular point (odd number: 2*PATCH_DIM0+1)
+    PATCH_DIM = 2 * PATCH_RAD + 1
+    @assert INTERP_ORDER <= PATCH_DIM "Must have INTERP_ORDER <= PATCH_DIM"
+    # Number of angular quadrature nodes in polar coordinates (uniformly distributed around circle)
+    ANG_DIM = 2 * RAD_DIM
+
+    # Setup radial quadrature
+    qx_raw, qw_raw = FastGaussQuadrature.gausslegendre(RAD_DIM) # points on [-1,1]
+    qx = (qx_raw .+ 1) ./ 2  # Map [-1, 1] to [0, 1]
+    qw = qw_raw ./ 2         # Adjust weights for interval change
+
+    # Partition of unity function, exp(-36 * r^p) where p depends on PATCH_DIM
+    pou_power = PATCH_DIM > 45 ? 10 : (PATCH_DIM > 20 ? 8 : 6)
+    pou(r) = r ≥ 1.0 ? 0.0 : exp(-36.0 * r^pou_power)
+
+    # Partition of Unity on Cartesian grid
+    Gpou = zeros(PATCH_DIM, PATCH_DIM)
+    for i in 1:PATCH_DIM, j in 1:PATCH_DIM
+        r = sqrt((i - 1 - PATCH_RAD)^2 + (j - 1 - PATCH_RAD)^2) / PATCH_RAD
+        Gpou[i, j] = -pou(r)
+    end
+
+    # Partition of Unity on polar grid including transformation Jacobian - Ppou = χ(ρ) M²/4 r dr dt, eq. 38 in Malhotra 2019
+    Ppou = zeros(RAD_DIM, ANG_DIM)
+    dθ = 2π / ANG_DIM
+    for j in 1:ANG_DIM, i in 1:RAD_DIM
+        dr = qw[i] * PATCH_RAD
+        rdθ = qx[i] * PATCH_RAD * dθ
+        Ppou[i, j] = pou(qx[i]) * dr * rdθ
+    end
+
+    # Spacing between Lagrange interpolation nodes in [0,1] for INTERP_ORDER-point stencil
+    h = 1.0 / (INTERP_ORDER - 1)
+
+    # Compute 2D tensor-product Lagrange basis function at (x0, x1) in local
+    # stencil coordinates for basis node (i0, i1) on uniform grid with spacing h
+    @inline function lagrange_interp(x0::Float64, x1::Float64, i0::Int, i1::Int)
+        Lx = Ly = 1.0
+        ξ0 = x0 / h
+        ξ1 = x1 / h
+        for j0 in 0:(INTERP_ORDER-1)
+            j0 != i0 && (Lx *= (ξ0 - j0) / (i0 - j0))
+        end
+        for j1 in 0:(INTERP_ORDER-1)
+            j1 != i1 && (Ly *= (ξ1 - j1) / (i1 - j1))
+        end
+        return Lx * Ly
+    end
+
+    # Build sparse interpolation operator P2G ∈ ℝ^{Ngrid × Npolar}
+    #   grid_values  = P2G  * polar_values
+    #   polar_values = P2G' * grid_values
+    # Each column of P2G contains the INTERP_ORDER² Lagrange weights
+    # mapping one polar sample to its surrounding Cartesian grid stencil.
+    Ngrid = PATCH_DIM * PATCH_DIM
+    Npolar = RAD_DIM * ANG_DIM
+
+    # Preallocate COO storage:
+    #   I_coo[k], J_coo[k] = (row, column) index of kth nonzero
+    #   V_coo[k]           = interpolation weight
+    nnz_per_polar = INTERP_ORDER^2
+    I_coo = Vector{Int}(undef, Npolar * nnz_per_polar)
+    J_coo = Vector{Int}(undef, Npolar * nnz_per_polar)
+    V_coo = Vector{Float64}(undef, Npolar * nnz_per_polar)
+
+    idx = 1
+    for ir in 1:RAD_DIM, ia in 1:ANG_DIM
+        # Map polar node to unit square: x0, x1 ∈ [0,1] × [0,1]
+        x0 = 0.5 + 0.5 * qx[ir] * cos(dθ * (ia - 1))
+        x1 = 0.5 + 0.5 * qx[ir] * sin(dθ * (ia - 1))
+
+        # Lower-left corner indices of INTERP_ORDER × INTERP_ORDER stencil centered on (x0,x1)
+        y0 = clamp(trunc(Int, x0 * (PATCH_DIM - 1) - (INTERP_ORDER - 1) ÷ 2), 0, PATCH_DIM - INTERP_ORDER)
+        y1 = clamp(trunc(Int, x1 * (PATCH_DIM - 1) - (INTERP_ORDER - 1) ÷ 2), 0, PATCH_DIM - INTERP_ORDER)
+
+        # Local coordinates within INTERP_ORDER×INTERP_ORDER stencil, normalized to [0,1]
+        z0 = (x0 * (PATCH_DIM - 1) - y0) * h
+        z1 = (x1 * (PATCH_DIM - 1) - y1) * h
+
+        # Polar point index (column in P2G)
+        j_polar = ir + RAD_DIM * (ia - 1)
+
+        # Populate stencil contributions for this polar node
+        for i0 in 1:INTERP_ORDER, i1 in 1:INTERP_ORDER
+            # Grid point index (row in P2G), using column-major layout
+            i_grid = (y0 + i0) + PATCH_DIM * (y1 + i1 - 1)
+            I_coo[idx] = i_grid
+            J_coo[idx] = j_polar
+            V_coo[idx] = lagrange_interp(z0, z1, i0 - 1, i1 - 1)
+            idx += 1
+        end
+    end
+
+    # Assemble sparse interpolation matrix
+    P2G = sparse(I_coo, J_coo, V_coo, Ngrid, Npolar)
+
+    return SingularQuadratureData(qx, qw, Gpou, Ppou, P2G, PATCH_DIM, PATCH_RAD, ANG_DIM, RAD_DIM, INTERP_ORDER)
+end
+
+# Global cache for quadrature data (initialized on first use)
+const SINGULAR_QUAD_CACHE = Ref{Union{Nothing,SingularQuadratureData}}(nothing)
+
+"""
+    get_singular_quadrature(PATCH_RAD::Int, RAD_DIM::Int, INTERP_ORDER::Int)
+
+Get cached singular quadrature data, initializing if necessary. Returns cached data
+if parameters match the cached initialization; reinitializes if parameters differ.
+This allows the user to change quadrature parameters between calls, but prevents
+redundant reinitialization when parameters are unchanged.
+"""
+function get_singular_quadrature(PATCH_RAD::Int, RAD_DIM::Int, INTERP_ORDER::Int)
+
+    # Check if cache exists and parameters match
+    cached = SINGULAR_QUAD_CACHE[]
+    if !isnothing(cached) &&
+       cached.PATCH_RAD == PATCH_RAD &&
+       cached.RAD_DIM == RAD_DIM &&
+       cached.INTERP_ORDER == INTERP_ORDER
+        return cached
+    end
+
+    # Reinitialize if parameters changed or cache is empty
+    SINGULAR_QUAD_CACHE[] = SingularQuadratureData(PATCH_RAD, RAD_DIM, INTERP_ORDER)
+    return SINGULAR_QUAD_CACHE[]
+end
+
+"""
+    laplace_single_layer(x_obs, x_src) -> Float64
+
+Evaluate the Laplace single-layer (FxU) kernel between two 3D points. Returns
+0.0 if the observation point coincides with the source point to avoid singularity.
+
+The single-layer kernel φ is the fundamental solution to Laplace's equation:
+
+```
+φ(x_obs, x_src) = 1 / (4π |x_obs - x_src|)
+```
+
+# Arguments
+
+  - `x_obs`: Observation point (3D Cartesian coordinates, any AbstractVector)
+  - `x_src`: Source point (3D Cartesian coordinates, any AbstractVector)
+
+# Returns
+
+  - `Float64`: Kernel value φ(x_obs, x_src)
+"""
+function laplace_single_layer(x_obs::AbstractVector{<:Real}, x_src::AbstractVector{<:Real})
+    @inbounds begin
+        dx = x_obs[1] - x_src[1]
+        dy = x_obs[2] - x_src[2]
+        dz = x_obs[3] - x_src[3]
+    end
+    r2 = dx*dx + dy*dy + dz*dz
+    r2 < 1e-30 && return 0.0
+    return INV_4PI * inv(sqrt(r2))
+end
+"""
+    laplace_double_layer(x_obs, x_src, n_src) -> Float64
+
+Evaluate the Laplace double-layer (DxU) kernel between a point and a surface element. Returns
+0.0 if the observation point coincides with the source point to avoid singularity. Allocation-free
+scalar arithmetic is used for maximum performance.
+
+The double-layer kernel K is the normal derivative of the fundamental solution:
+
+```
+K(x_obs, x_src, n_src) = ∇_{x_src} φ · n_src = 1/(4π) * (x_obs - x_src) · n_src / |x_obs - x_src|³
+```
+
+# Arguments
+
+  - `x_obs`: Observation point (3D Cartesian coordinates, any AbstractVector)
+  - `x_src`: Source point on surface (3D Cartesian coordinates, any AbstractVector)
+  - `n_src`: Outward UNIT normal at source point (must be normalized!, any AbstractVector)
+
+# Returns
+
+  - `Float64`: Kernel value K(x_obs, x_src, n_src)
+"""
+function laplace_double_layer(x_obs::AbstractVector{<:Real}, x_src::AbstractVector{<:Real}, n_src::AbstractVector{<:Real})
+    @inbounds begin
+        dx = x_obs[1] - x_src[1]
+        dy = x_obs[2] - x_src[2]
+        dz = x_obs[3] - x_src[3]
+        nx = n_src[1]
+        ny = n_src[2]
+        nz = n_src[3]
+    end
+    r2 = dx*dx + dy*dy + dz*dz
+    r2 < 1e-30 && return 0.0
+    rinv = inv(sqrt(r2))
+    r3inv = rinv * rinv * rinv
+    return (dx*nx + dy*ny + dz*nz) * (INV_4PI * r3inv)
+end
+
+"""
+    extract_patch!(patch, data, Nt, Np, t0, p0, PATCH_DIM)
+
+Extract a PATCH_DIM × PATCH_DIM patch of data centered at (t0, p0) with periodic wrapping.
+
+# Arguments
+
+  - `patch`: Preallocated output array for data around the singular point (PATCH_DIM × PATCH_DIM × dof)
+  - `data`: Source data array (can be coordinates, normals, or area elements)
+  - `Nt, Np`: Grid dimensions (toroidal, poloidal)
+  - `t0, p0`: Center indices (1-based)
+  - `PATCH_DIM`: Patch size (must be odd)
+"""
+function extract_patch!(patch::Array{Float64,3}, data::Matrix{Float64}, idx_pol_center::Int, idx_tor_center::Int, npol::Int, ntor::Int, PATCH_DIM::Int)
+
+    PATCH_RAD = (PATCH_DIM - 1) ÷ 2
+    @inbounds for j in 1:PATCH_DIM, i in 1:PATCH_DIM
+        # Enforce periodicity
+        idx_pol = periodic_wrap(idx_pol_center - PATCH_RAD + i - 1, npol)
+        idx_tor = periodic_wrap(idx_tor_center - PATCH_RAD + j - 1, ntor)
+        # Copy data to the patch using direct indexing (avoids view allocation)
+        idx_src = idx_pol + npol * (idx_tor - 1)
+        patch[i, j, 1] = data[idx_src, 1]
+        patch[i, j, 2] = data[idx_src, 2]
+        patch[i, j, 3] = data[idx_src, 3]
+    end
+end
+
+"""
+    interpolate_to_polar!(polar_data, patch, quad_data)
+
+Interpolate Cartesian patch data to polar quadrature points using sparse matrix multiply.
+Overwrites `polar_data` using mul! function arguments, mul!(C, A, B, α, β) -> C where
+C = α * A * B + β * C.
+
+# Arguments
+
+  - `polar_data`: Preallocated output array for polar data (RAD_DIM × ANG_DIM × dof)
+  - `patch`: Patch data (PATCH_DIM × PATCH_DIM × dof)
+  - `P2G`: Sparse interpolation matrix
+"""
+function interpolate_to_polar!(polar_data::Array{Float64,3}, patch::Array{Float64,3}, P2G::SparseMatrixCSC{Float64,Int})
+    # Flatten patch to (Ngrid × dof), apply P2G' to get (Npolar × dof)
+    patch_flat = reshape(patch, :, size(patch, 3))
+    mul!(reshape(polar_data, :, size(patch, 3)), P2G', patch_flat, 1.0, 0.0)
+end
+
+"""
+    compute_polar_normal!(n_polar, dr_dθ_polar, dr_dζ_polar)
+
+Compute normal vector (= ∂r/∂θ × ∂r/∂ζ) at polar quadrature points from interpolated tangent vectors.
+We already scaled the normals by normal_orient in the geometry construction, so we need to reapply
+that here since we are recomputing the normals from the derivatives.
+
+# Arguments
+
+  - `n_polar`: Preallocation unit normal vector at each polar point (RAD_DIM × ANG_DIM × 3)
+  - `dr_dθ_polar`: Interpolated ∂r/∂θ at polar points (RAD_DIM × ANG_DIM × 3)
+  - `dr_dζ_polar`: Interpolated ∂r/∂ζ at polar points (RAD_DIM × ANG_DIM × 3)
+  - `normal_orient`: Multiplier applied to normals to make them orient out of vacuum region (+1 or -1)
+"""
+function compute_polar_normal!(n_polar::Array{Float64,3}, dr_dθ::Array{Float64,3}, dr_dζ::Array{Float64,3}, normal_orient::Int)
+    # Inline cross product to avoid slice allocation
+    @inbounds for ia in axes(dr_dθ, 2), ir in axes(dr_dθ, 1)
+        n_polar[ir, ia, 1] = dr_dθ[ir, ia, 2] * dr_dζ[ir, ia, 3] - dr_dθ[ir, ia, 3] * dr_dζ[ir, ia, 2]
+        n_polar[ir, ia, 2] = dr_dθ[ir, ia, 3] * dr_dζ[ir, ia, 1] - dr_dθ[ir, ia, 1] * dr_dζ[ir, ia, 3]
+        n_polar[ir, ia, 3] = dr_dθ[ir, ia, 1] * dr_dζ[ir, ia, 2] - dr_dθ[ir, ia, 2] * dr_dζ[ir, ia, 1]
+    end
+    n_polar .*= normal_orient
+end
+
+"""
+    KernelWorkspace
+
+Thread-local workspace for `compute_3D_kernel_matrix!` to enable parallel execution.
+Each thread gets its own workspace to avoid data races on temporary arrays.
+"""
+struct KernelWorkspace
+    r_patch::Array{Float64,3}
+    dr_dθ_patch::Array{Float64,3}
+    dr_dζ_patch::Array{Float64,3}
+    r_polar::Array{Float64,3}
+    dr_dθ_polar::Array{Float64,3}
+    dr_dζ_polar::Array{Float64,3}
+    n_polar::Array{Float64,3}
+    M_polar_single::Matrix{Float64}
+    M_polar_double::Matrix{Float64}
+    M_grid_single_flat::Vector{Float64}
+    M_grid_double_flat::Vector{Float64}
+end
+
+"""
+    KernelWorkspace(PATCH_DIM, RAD_DIM, ANG_DIM)
+
+Create a new workspace with pre-allocated arrays for kernel matrix computation.
+"""
+function KernelWorkspace(PATCH_DIM::Int, RAD_DIM::Int, ANG_DIM::Int)
+    return KernelWorkspace(
+        zeros(PATCH_DIM, PATCH_DIM, 3),      # r_patch
+        zeros(PATCH_DIM, PATCH_DIM, 3),      # dr_dθ_patch
+        zeros(PATCH_DIM, PATCH_DIM, 3),      # dr_dζ_patch
+        zeros(RAD_DIM, ANG_DIM, 3),          # r_polar
+        zeros(RAD_DIM, ANG_DIM, 3),          # dr_dθ_polar
+        zeros(RAD_DIM, ANG_DIM, 3),          # dr_dζ_polar
+        zeros(RAD_DIM, ANG_DIM, 3),          # n_polar
+        zeros(RAD_DIM, ANG_DIM),             # M_polar_single
+        zeros(RAD_DIM, ANG_DIM),             # M_polar_double
+        zeros(PATCH_DIM^2),                  # M_grid_single_flat
+        zeros(PATCH_DIM^2)                   # M_grid_double_flat
+    )
+end
+
+"""
+    compute_3D_kernel_matrix!(grad_greenfunction, greenfunction, observer, source; PATCH_RAD=3, RAD_DIM=15, INTERP_ORDER=6)
+
+Compute boundary integral kernel matrices for 3D geometries with the singular correction
+algorithm from Malhotra et al. 2019. Uses multi-threading for parallel computation over
+observer points.
+
+  - Far regions: Rectangle rule with uniform weights (1/N)
+  - Singular regions: Polar quadrature with partition-of-unity blending
+
+grad_greenfunction is the double-layer kernel matrix, where each entry is
+∇_{x_src} φ(x_obs, x_src) · n_src, and greenfunction is the single-layer kernel matrix,
+where each entry is φ(x_obs, x_src).
+
+# Arguments
+
+  - `grad_greenfunction`: Double-layer kernel matrix (Nobs × Nsrc) filled in place
+
+  - `greenfunction`: Single-layer kernel matrix (Nobs × Nsrc) filled in place
+  - `observer`: Observer geometry (PlasmaGeometry3D)
+  - `source`: Source geometry (PlasmaGeometry3D)
+  - `PATCH_RAD`: Number of points adjacent to source point to treat as singular
+
+      + Total patch size in # of gridpoints = (2 * PATCH_RAD + 1) x (2 * PATCH_RAD + 1)
+  - `RAD_DIM`: Polar radial quadrature order. Angular order = 2 * RAD_DIM
+  - `INTERP_ORDER`: Lagrange interpolation order
+
+      + Must be ≤ (2 * PATCH_RAD + 1)
+
+# Threading
+
+This function automatically uses all available threads (`Threads.nthreads()`).
+Start Julia with `julia -t auto` or set `JULIA_NUM_THREADS` to enable multi-threading.
+"""
+function compute_3D_kernel_matrix!(
+    grad_greenfunction::Matrix{Float64},
+    greenfunction::Matrix{Float64},
+    observer::Union{PlasmaGeometry3D,WallGeometry3D},
+    source::Union{PlasmaGeometry3D,WallGeometry3D},
+    PATCH_RAD_POL::Int,
+    PATCH_RAD_TOR::Int,
+    RAD_DIM::Int,
+    INTERP_ORDER::Int
+)
+    num_points = observer.mtheta * observer.nzeta
+    dθdζ = 4π^2 / (num_points)
+
+    # Get block of grad green function matrix
+    col_index = (source isa PlasmaGeometry3D ? 1 : 2)
+    row_index = (observer isa PlasmaGeometry3D ? 1 : 2)
+    grad_greenfunction_block = view(
+        grad_greenfunction,
+        ((row_index-1)*num_points+1):(row_index*num_points),
+        ((col_index-1)*num_points+1):(col_index*num_points)
+    )
+
+    # Zero out green function matrix
+    fill!(greenfunction, 0.0)
+    # 𝒢ⁿ only needed for plasma as source term (RHS of eqs. 26/27 in Chance 1997)
+    populate_greenfunction = source isa PlasmaGeometry3D
+
+    # Initialize quadrature data
+    quad_data = get_singular_quadrature(PATCH_RAD_POL, RAD_DIM, INTERP_ORDER)
+    (; PATCH_DIM, PATCH_RAD, ANG_DIM, RAD_DIM, Ppou, Gpou, P2G) = quad_data
+    @assert observer.mtheta ≥ PATCH_DIM
+    @assert observer.nzeta ≥ PATCH_DIM
+
+    # Allocate thread-local workspaces (one per thread)
+    nthreads = Threads.nthreads()
+    workspaces = [KernelWorkspace(PATCH_DIM, RAD_DIM, ANG_DIM) for _ in 1:nthreads]
+
+    # Parallel loop through observer points
+    Threads.@threads for idx_obs in 1:num_points
+        # Get thread-local workspace
+        ws = workspaces[Threads.threadid()]
+        (; r_patch, dr_dθ_patch, dr_dζ_patch, r_polar, dr_dθ_polar, dr_dζ_polar,
+            n_polar, M_polar_single, M_polar_double, M_grid_single_flat, M_grid_double_flat) = ws
+
+        # Convert linear index to 2D indices
+        i_obs = mod1(idx_obs, observer.mtheta)
+        j_obs = (idx_obs - 1) ÷ observer.mtheta + 1
+        r_obs = @view observer.r[idx_obs, :]
+
+        # ============================================================
+        # FAR FIELD: Trapezoidal rule for nonsingular source points
+        # Note: kernels return zero for r_src = r_obs
+        # ============================================================
+        @inbounds for idx_src in 1:num_points
+            # Evaluate kernels at grid points
+            r_src = @view source.r[idx_src, :]
+            n_src = @view source.normal[idx_src, :]
+            K_single = laplace_single_layer(r_obs, r_src)
+            K_double = laplace_double_layer(r_obs, r_src, n_src)
+
+            # Apply weights (periodic trapezoidal rule = constant weights)
+            if populate_greenfunction
+                greenfunction[idx_obs, idx_src] = K_single * dθdζ
+            end
+            grad_greenfunction_block[idx_obs, idx_src] = K_double * dθdζ
+        end
+
+        # ============================================================
+        # NEAR FIELD: Polar quadrature with singular correction
+        # ============================================================
+        # Extract patches of source data around the singular point (size = PATCH_DIM x PATCH_DIM x dof)
+        extract_patch!(r_patch, source.r, i_obs, j_obs, source.mtheta, source.nzeta, PATCH_DIM)
+        extract_patch!(dr_dθ_patch, source.dr_dθ, i_obs, j_obs, source.mtheta, source.nzeta, PATCH_DIM)
+        extract_patch!(dr_dζ_patch, source.dr_dζ, i_obs, j_obs, source.mtheta, source.nzeta, PATCH_DIM)
+
+        # Interpolate coordinates and tangent vectors to polar quadrature points
+        interpolate_to_polar!(r_polar, r_patch, P2G)
+        interpolate_to_polar!(dr_dθ_polar, dr_dθ_patch, P2G)
+        interpolate_to_polar!(dr_dζ_polar, dr_dζ_patch, P2G)
+
+        # Compute normal vectors at polar points from interpolated tangent vectors
+        compute_polar_normal!(n_polar, dr_dθ_polar, dr_dζ_polar, source.normal_orient)
+
+        # Evaluate kernels at polar points with POU weighting
+        @inbounds for ia in 1:ANG_DIM, ir in 1:RAD_DIM
+            # Evaluate kernels using recomputed normal (use @view to avoid allocation)
+            r_src = @view r_polar[ir, ia, :]
+            n_src = @view n_polar[ir, ia, :]
+            K_single = laplace_single_layer(r_obs, r_src)
+            K_double = laplace_double_layer(r_obs, r_src, n_src)
+
+            # Apply quadrature weights: area element × POU, where POU contains rdrdθ already
+            M_polar_single[ir, ia] = K_single * Ppou[ir, ia] * dθdζ
+            M_polar_double[ir, ia] = K_double * Ppou[ir, ia] * dθdζ
+        end
+
+        # Distribute polar singular corrections back to Cartesian grid using sparse matrix
+        # grid = P2G * polar (maps Npolar → Ngrid)
+        mul!(M_grid_single_flat, P2G, vec(M_polar_single))
+        mul!(M_grid_double_flat, P2G, vec(M_polar_double))
+        M_grid_single = reshape(M_grid_single_flat, PATCH_DIM, PATCH_DIM)
+        M_grid_double = reshape(M_grid_double_flat, PATCH_DIM, PATCH_DIM)
+
+        # Compute remaining far-field POU contribution and near-field polar quadrature result
+        # We include this region in the far-field trapezoidal rule, so use Gpou = -χ here to get 1-χ
+        @inbounds for j in 1:PATCH_DIM, i in 1:PATCH_DIM
+            # Map back to global indices
+            idx_pol = periodic_wrap(i_obs - PATCH_RAD + i - 1, source.mtheta)
+            idx_tor = periodic_wrap(j_obs - PATCH_RAD + j - 1, source.nzeta)
+            idx_src = idx_pol + source.mtheta * (idx_tor - 1)
+
+            # Remainder of far-field contribution on the singular grid: Gpou = -χ
+            r_src = @view source.r[idx_src, :]
+            n_src = @view source.normal[idx_src, :]
+            far_single = laplace_single_layer(r_obs, r_src) * Gpou[i, j] * dθdζ
+            far_double = laplace_double_layer(r_obs, r_src, n_src) * Gpou[i, j] * dθdζ
+
+            # Apply near + far contributions
+            if populate_greenfunction
+                greenfunction[idx_obs, idx_src] += M_grid_single[i, j] + far_single
+            end
+            grad_greenfunction_block[idx_obs, idx_src] += M_grid_double[i, j] + far_double
+        end
+    end
+end
diff --git a/src/Vacuum/MathUtils.jl b/src/Vacuum/MathUtils.jl
index d8ec0b88..ccfc31a6 100644
--- a/src/Vacuum/MathUtils.jl
+++ b/src/Vacuum/MathUtils.jl
@@ -5,58 +5,47 @@ Perform the inverse Fourier transform of `gil` onto `gll` using Fourier coeffici
 
 # Arguments
 
-  - `gll`: Output matrix (mpert × mpert) updated in-place
-  - `gil`: Input matrix (mtheta × mpert) containing Fourier-space data
-  - `cs`: Fourier coefficient matrix (mtheta × mpert)
+  - `gll`: Output matrix (num_pert × num_pert) updated in-place
+  - `gil`: Input matrix (num_points × num_pert) containing Fourier-space data
+  - `cs`: Fourier coefficient matrix (num_points × num_pert)
   - `m00`: Integer offset in the gil matrix (row offset)
   - `l00`: Integer offset in the gil matrix (column offset)
+  - `weight`: Quadrature weight factor
 
 # Notes
 
-  - Computes: `gll[l2, l1] = (2π * dth) * Σ_i cs[i, l2] * gil[i, l1]`
+  - Computes: `gll[l2, l1] = weight * Σ_i cs[i, l2] * gil[i, l1]`
   - Performs the same function as fouranv in the Fortran code.
 
 # Returns
 
   - gll(l2,l1) : output matrix updated in-place (mpert × mpert)
 """
-function fourier_inverse_transform!(gll::Matrix{Float64}, gil::Matrix{Float64}, cs::Matrix{Float64}, m00::Int, l00::Int)
-
-    # Zero out gll block
-    mtheta, mpert = size(cs)
-    fill!(view(gll, 1:mpert, 1:mpert), 0.0)
-
+function fourier_inverse_transform!(gll::Matrix{Float64}, gil::Matrix{Float64}, cs::Matrix{Float64}, m00::Int, l00::Int, weight::Float64)
     # Inverse Fourier transform via matrix multiply: gll = cs^T * gil * (2π * dth)
-    # This computes: gll[l2, l1] = (2π * dth) * Σ_i cs[i, l2] * gil[i, l1]
-    dth = 2π / mtheta
-    mul!(gll, cs', view(gil, (m00+1):(m00+mtheta), (l00+1):(l00+mpert)), 2π * dth, 0.0)
+    num_points, num_pert = size(cs)
+    mul!(gll, cs', view(gil, (m00+1):(m00+num_points), (l00+1):(l00+num_pert)), weight, 0.0)
 end
 
 """
-    fourier_transform!(gil, gij, cs, m00, l00, mth, mpert)
+    fourier_transform!(gil, gij, cs, m00, l00)
 
     Purpose:
       This routine performs a truncated Fourier transform of gij onto gil
       using Fourier coefficients stored in cs.
 
     Inputs:
-      gij(i,j)   : input matrix of size (mth × mth), the "physical-space" data
-      cs(j,l)    : Fourier coefficient matrix (mth × mpert)
+      gij(i,j)   : input matrix of size (num_points × num_points), the "physical-space" data
+      cs(j,l)    : Fourier coefficient matrix (num_points × num_pert)
       m00, l00   : integer offsets in the gil matrix
-      mth        : number of θ-grid points (dimension of gij along i, j)
-      mpert      : number of Fourier modes
 
     Output:
-      gil(i', l') : output matrix updated in-place (mth × mpert), where i' = m00 + i and l' = l00 + l
+      gil(i', l') : output matrix updated in-place (num_points × num_pert), where i' = m00 + i and l' = l00 + l
 """
 function fourier_transform!(gil::Matrix{Float64}, gij::Matrix{Float64}, cs::Matrix{Float64}, m00::Int, l00::Int)
-
-    # Zero out relevant gil block
-    mtheta, mpert = size(cs)
-    fill!(view(gil, (m00+1):(m00+mtheta), (l00+1):(l00+mpert)), 0.0)
-
     # Fourier transform via matrix multiply: gil[i, l] = Σ_j gij[i, j] * cs[j, l]
-    mul!(view(gil, (m00+1):(m00+mtheta), (l00+1):(l00+mpert)), gij, cs)
+    num_points, num_pert = size(cs)
+    mul!(view(gil, (m00+1):(m00+num_points), (l00+1):(l00+num_pert)), gij, cs)
 end
 
 # Returns the array of derivatives at all x points, I think this acts like difspl
diff --git a/src/Vacuum/Vacuum.jl b/src/Vacuum/Vacuum.jl
index f7e43b08..d5a08224 100644
--- a/src/Vacuum/Vacuum.jl
+++ b/src/Vacuum/Vacuum.jl
@@ -1,14 +1,17 @@
 module Vacuum
 
 using TOML, Interpolations, SpecialFunctions, LinearAlgebra, Printf
+using StaticArrays
+using FastGaussQuadrature
+using SparseArrays
 
-export mscvac, set_dcon_params, VacuumInput, compute_vacuum_response
+export mscvac, set_dcon_params, VacuumInput, compute_vacuum_response, compute_vacuum_response_3D
 export compute_vacuum_field
-export kernel!
 export WallShapeSettings
 
 include("DataTypes.jl")
 include("Kernel2D.jl")
+include("Kernel3D.jl")
 include("MathUtils.jl")
 
 # ======================================================================
@@ -205,7 +208,7 @@ end
 Compute the vacuum response matrix using provided vacuum inputs.
 
 This is the pure Julia implementation that replaces the Fortran `mscvac` function.
-It returns the relevant arrays: `wv`, `grri`, and `xzpts`.
+It returns the relevant arrays: `wv`, `green_fourier`, `plasma_coords`, and `wall_coords`.
 
 # Arguments
 
@@ -216,89 +219,184 @@ It returns the relevant arrays: `wv`, `grri`, and `xzpts`.
 # Returns
 
   - `wv`: Complex vacuum response matrix (mpert × mpert) relating plasma perturbations to vacuum response
-  - `grri`: Green's function response matrix (2*mtheta × 2*mpert) in Fourier space
-  - `xzpts`: Coordinate array (mtheta × 4) containing [R_plasma, Z_plasma, R_wall, Z_wall]
-
-# Notes
-
-  - This function initializes the plasma surface and wall geometry internally
-  - The vacuum response includes plasma-plasma and plasma-wall coupling effects
-  - For n=0 modes with closed walls, a regularization factor is added to prevent singularities
+  - `green_fourier`: Green's function response matrix (2 * mtheta × 2 * mpert) in Fourier space
+  - `plasma_coords`: Cartesian coordinate array (mtheta × 3) containing [R_plasma, 0, Z_plasma]
+  - `wall_coords`: Cartesian coordinate array (mtheta × 3) containing [R_wall, 0, Z_wall]
 """
 function compute_vacuum_response(inputs::VacuumInput, wall_settings::WallShapeSettings)
 
     # Initialization and allocations
     (; mtheta, mpert, n, kernelsign, force_wv_symmetry) = inputs
-    plasma_surf = initialize_plasma_surface(inputs)
-    wall = initialize_wall(inputs, plasma_surf, wall_settings)
-    grri = zeros(2 * mtheta, 2 * mpert)
-    grad_greenfunction_mat = zeros(2 * mtheta, 2 * mtheta)
-    greenfunction_temp = zeros(mtheta, mtheta)
+    plasma_surf = PlasmaGeometry(inputs)
+    wall = WallGeometry(inputs, plasma_surf, wall_settings)
+
+    # Allocate matrices upfront with correct size based on wall presence
+    grad_green_size = wall.nowall ? mtheta : 2 * mtheta
+    grad_green = zeros(grad_green_size, grad_green_size)
+    green_temp = zeros(mtheta, mtheta)
+
+    # 𝒢ₗ(θⱼ) from Chance eq. 106-108. first mtheta rows are plasma as observer, second are wall
+    # First mpert columns are real (cosine), second mpert are imaginary (sine)
+    green_fourier_rows = wall.nowall ? mtheta : 2 * mtheta
+    green_fourier = zeros(green_fourier_rows, 2 * mpert)
+    PLASMA_ROW_OFFSET = 0
+    WALL_ROW_OFFSET = mtheta
+    COS_COL_OFFSET = 0
+    SIN_COL_OFFSET = mpert
 
     # Plasma–Plasma block
-    j1, j2 = 1, 1
-    kernel!(grad_greenfunction_mat, greenfunction_temp, plasma_surf.x, plasma_surf.z, plasma_surf.x, plasma_surf.z, j1, j2, n)
+    kernel!(grad_green, green_temp, plasma_surf, plasma_surf, n)
 
-    # Fourier transform plasma-plasma block
-    fourier_transform!(grri, greenfunction_temp, plasma_surf.cos_ln_basis, 0, 0)
-    fourier_transform!(grri, greenfunction_temp, plasma_surf.sin_ln_basis, 0, mpert)
+    # Fourier transform obs=plasma, src=plasma block into green_fourier
+    fourier_transform!(green_fourier, green_temp, plasma_surf.cos_mn_basis, PLASMA_ROW_OFFSET, COS_COL_OFFSET)
+    fourier_transform!(green_fourier, green_temp, plasma_surf.sin_mn_basis, PLASMA_ROW_OFFSET, SIN_COL_OFFSET)
 
-    !wall.nowall && begin
+    if !wall.nowall
         # Plasma–Wall block
-        j1, j2 = 1, 2
-        kernel!(grad_greenfunction_mat, greenfunction_temp, plasma_surf.x, plasma_surf.z, wall.x, wall.z, j1, j2, n)
-
+        kernel!(grad_green, green_temp, plasma_surf, wall, n)
         # Wall–Wall block
-        j1, j2 = 2, 2
-        kernel!(grad_greenfunction_mat, greenfunction_temp, wall.x, wall.z, wall.x, wall.z, j1, j2, n)
-
+        kernel!(grad_green, green_temp, wall, wall, n)
         # Wall–Plasma block
-        j1, j2 = 2, 1
-        kernel!(grad_greenfunction_mat, greenfunction_temp, wall.x, wall.z, plasma_surf.x, plasma_surf.z, j1, j2, n)
+        kernel!(grad_green, green_temp, wall, plasma_surf, n)
 
-        # Fourier transform wall blocks into grri
-        fourier_transform!(grri, greenfunction_temp, plasma_surf.cos_ln_basis, mtheta, 0)
-        fourier_transform!(grri, greenfunction_temp, plasma_surf.sin_ln_basis, mtheta, mpert)
+        # Fourier transform obs=wall, src=plasma block into green_fourier
+        fourier_transform!(green_fourier, green_temp, plasma_surf.cos_mn_basis, WALL_ROW_OFFSET, COS_COL_OFFSET)
+        fourier_transform!(green_fourier, green_temp, plasma_surf.sin_mn_basis, WALL_ROW_OFFSET, SIN_COL_OFFSET)
     end
 
     # Add cn0 to make grdgre nonsingular for n=0 modes
     cn0 = 1.0 # expose to user if anyone ever actually tries to use this
-    (abs(n) <= 1e-5 && !wall.nowall && wall.is_closed_toroidal) && begin
+    (n == 0 && !wall.nowall && wall.is_closed_toroidal) && begin
         @warn "Adding $cn0 to diagonal of grdgre to regularize n=0 mode; this may affect accuracy of results."
         mth12 = wall.nowall ? mtheta : 2 * mtheta
         for i in 1:mth12, j in 1:mth12
-            grad_greenfunction_mat[i, j] += cn0
+            grad_green[i, j] += cn0
         end
     end
 
     # Only needed for mutual inductance with the wall calculations
-    (kernelsign < 0) && begin
-        grad_greenfunction_mat .*= kernelsign
+    if kernelsign < 0
+        grad_green .*= kernelsign
         # Account for factor of 2 in diagonal terms in eq. 90 of Chance
-        for i in 1:(2*mtheta)
-            grad_greenfunction_mat[i, i] += 2.0
-        end
+        grad_green .+= 2I
     end
 
-    # Invert the vacuum response system of equations, eqs. 92-94ish of Chance 1997 (gelimb in Fortran)
-    # If plasma only, lower blocks will be empty
-    if wall.nowall
-        @views grri[1:mtheta, :] .= grad_greenfunction_mat[1:mtheta, 1:mtheta] \ grri[1:mtheta, :]
-    else
-        grri .= grad_greenfunction_mat \ grri
-    end
+    # Invert the vacuum response system of equations, eqs. 112 of Chance 1997 (gelimb in Fortran)
+    green_fourier .= grad_green \ green_fourier
 
     # There's some logic that computes xpass/zpass and chiwc/chiws here, might eventually be needed?
 
     # Perform inverse Fourier transforms to get response matrix components (eq. 115-118 of Chance 2007)
-    arr = zeros(mpert, mpert)
-    aii = zeros(mpert, mpert)
-    ari = zeros(mpert, mpert)
-    air = zeros(mpert, mpert)
-    fourier_inverse_transform!(arr, grri, plasma_surf.cos_ln_basis, 0, 0)
-    fourier_inverse_transform!(aii, grri, plasma_surf.sin_ln_basis, 0, mpert)
-    fourier_inverse_transform!(ari, grri, plasma_surf.sin_ln_basis, 0, 0)
-    fourier_inverse_transform!(air, grri, plasma_surf.cos_ln_basis, 0, mpert)
+    dθ = 2π / mtheta
+    arr, aii, ari, air = ntuple(_ -> zeros(mpert, mpert), 4)
+    fourier_inverse_transform!(arr, green_fourier, plasma_surf.cos_mn_basis, PLASMA_ROW_OFFSET, COS_COL_OFFSET, 2π * dθ)
+    fourier_inverse_transform!(aii, green_fourier, plasma_surf.sin_mn_basis, PLASMA_ROW_OFFSET, SIN_COL_OFFSET, 2π * dθ)
+    fourier_inverse_transform!(ari, green_fourier, plasma_surf.sin_mn_basis, PLASMA_ROW_OFFSET, COS_COL_OFFSET, 2π * dθ)
+    fourier_inverse_transform!(air, green_fourier, plasma_surf.cos_mn_basis, PLASMA_ROW_OFFSET, SIN_COL_OFFSET, 2π * dθ)
+
+    # Final form of vacuum response matrix (eq. 114 of Chance 2007)
+    wv = complex.(arr .+ aii, air .- ari)
+
+    # Force symmetry of response matrix if desired
+    force_wv_symmetry && hermitianpart!(wv)
+
+    # Create plasma_coords and wall_coords arrays
+    plasma_coords = zeros(inputs.mtheta, 3)
+    wall_coords = zeros(inputs.mtheta, 3)
+    @views plasma_coords[:, 1] .= plasma_surf.x
+    @views plasma_coords[:, 2] .= 0.0
+    @views plasma_coords[:, 3] .= plasma_surf.z
+    @views wall_coords[:, 1] .= wall.x
+    @views wall_coords[:, 2] .= 0.0
+    @views wall_coords[:, 3] .= wall.z
+    return wv, green_fourier, plasma_coords, wall_coords
+end
+
+"""
+    compute_vacuum_response_3D(inputs::VacuumInput3D, wall_settings::WallShapeSettings)
+
+Compute the vacuum response matrix via the 3D approach using provided vacuum inputs.
+It returns the relevant arrays: `wv`, `green_fourier`, `plasma_coords`, and `wall_coords`.
+
+# Arguments
+
+  - `inputs::VacuumInput3D`: Struct containing vacuum calculation parameters including mode numbers,
+    grid resolution, toroidal mode numbers, and plasma boundary information.
+  - `wall_settings::WallShapeSettings`: Struct specifying the wall geometry configuration.
+
+# Returns
+
+  - `wv`: Complex vacuum response matrix (mpert * npert × mpert * npert) relating plasma perturbations to vacuum response
+  - `green_fourier`: Green's function response matrix (2 * mtheta * nzeta × 2 * mpert * npert) in Fourier space
+  - `plasma_coords`: Cartesian coordinate array (mtheta * nzeta × 3) of the plasma surface
+  - `wall_coords`: Cartesian coordinate array (mtheta * nzeta × 3) of the wall
+"""
+function compute_vacuum_response_3D(inputs::VacuumInput3D, wall_settings::WallShapeSettings; PATCH_RAD::Int=11, RAD_DIM::Int=20, INTERP_ORDER::Int=5)
+
+    # Initialization and allocations
+    (; mtheta, mpert, force_wv_symmetry, kernelsign, nzeta, npert) = inputs
+    num_points = nzeta * mtheta
+    num_modes = npert * mpert
+
+    # TODO: Currently only supports axisymmetric surfaces
+    plasma_surf = PlasmaGeometry3D(inputs)
+    wall = WallGeometry3D(inputs, plasma_surf, wall_settings)
+    patch_rad_pol = PATCH_RAD;
+    patch_rad_tor = PATCH_RAD #round(Int, PATCH_RAD * plasma_surf.aspect_ratio)
+
+    # Allocate based on wall presence
+    grad_green_size = wall.nowall ? num_points : 2 * num_points
+    grad_green = zeros(grad_green_size, grad_green_size)
+    green_temp = zeros(num_points, num_points)
+
+    # 𝒢ₗ(θⱼ) from Chance eq. 106-108. first num_points rows are plasma as observer, second are wall
+    # First num_modes columns are real (cosine), second num_modes are imaginary (sine)
+    green_fourier_rows = wall.nowall ? num_points : 2 * num_points
+    green_fourier = zeros(green_fourier_rows, 2 * num_modes)
+    PLASMA_ROW_OFFSET = 0
+    WALL_ROW_OFFSET = num_points
+    COS_COL_OFFSET = 0
+    SIN_COL_OFFSET = num_modes
+
+    # Plasma–Plasma block
+    compute_3D_kernel_matrix!(grad_green, green_temp, plasma_surf, plasma_surf, patch_rad_pol, patch_rad_tor, RAD_DIM, INTERP_ORDER)
+
+    # Fourier transform obs=plasma, src=plasma block into green_fourier
+    fourier_transform!(green_fourier, green_temp, plasma_surf.cos_mn_basis3D, PLASMA_ROW_OFFSET, COS_COL_OFFSET)
+    fourier_transform!(green_fourier, green_temp, plasma_surf.sin_mn_basis3D, PLASMA_ROW_OFFSET, SIN_COL_OFFSET)
+
+    if !wall.nowall
+        # Plasma–Wall block
+        compute_3D_kernel_matrix!(grad_green, green_temp, plasma_surf, wall, patch_rad_pol, patch_rad_tor, RAD_DIM, INTERP_ORDER)
+        # Wall–Wall block
+        compute_3D_kernel_matrix!(grad_green, green_temp, wall, wall, patch_rad_pol, patch_rad_tor, RAD_DIM, INTERP_ORDER)
+        # Wall–Plasma block
+        compute_3D_kernel_matrix!(grad_green, green_temp, wall, plasma_surf, patch_rad_pol, patch_rad_tor, RAD_DIM, INTERP_ORDER)
+
+        # Fourier transform obs=wall, src=plasma block into green_fourier
+        fourier_transform!(green_fourier, green_temp, plasma_surf.cos_mn_basis3D, WALL_ROW_OFFSET, COS_COL_OFFSET)
+        fourier_transform!(green_fourier, green_temp, plasma_surf.sin_mn_basis3D, WALL_ROW_OFFSET, SIN_COL_OFFSET)
+    end
+
+    grad_green += 0.5I
+
+    # Only needed for mutual inductance with the wall calculations
+    if kernelsign < 0
+        grad_green .*= kernelsign
+        # Account for factor of 2 in diagonal terms in eq. 90 of Chance
+        grad_green .+= 2I
+    end
+
+    # Invert the vacuum response system of equations, eqs. 112 of Chance 1997 (gelimb in Fortran)
+    green_fourier .= grad_green \ green_fourier
+
+    # Perform inverse Fourier transforms to get response matrix components (eq. 115-118 of Chance 2007)
+    dθdζ = 4π^2 / (num_points)
+    arr, aii, ari, air = ntuple(_ -> zeros(num_modes, num_modes), 4)
+    fourier_inverse_transform!(arr, green_fourier, plasma_surf.cos_mn_basis3D, PLASMA_ROW_OFFSET, COS_COL_OFFSET, dθdζ)
+    fourier_inverse_transform!(aii, green_fourier, plasma_surf.sin_mn_basis3D, PLASMA_ROW_OFFSET, SIN_COL_OFFSET, dθdζ)
+    fourier_inverse_transform!(ari, green_fourier, plasma_surf.sin_mn_basis3D, PLASMA_ROW_OFFSET, COS_COL_OFFSET, dθdζ)
+    fourier_inverse_transform!(air, green_fourier, plasma_surf.cos_mn_basis3D, PLASMA_ROW_OFFSET, SIN_COL_OFFSET, dθdζ)
 
     # Final form of vacuum response matrix (eq. 114 of Chance 2007)
     wv = complex.(arr .+ aii, air .- ari)
@@ -306,13 +404,7 @@ function compute_vacuum_response(inputs::VacuumInput, wall_settings::WallShapeSe
     # Force symmetry of response matrix if desired
     force_wv_symmetry && hermitianpart!(wv)
 
-    # Create xzpts array
-    xzpts = zeros(Float64, inputs.mtheta, 4)
-    @views xzpts[:, 1] .= plasma_surf.x
-    @views xzpts[:, 2] .= plasma_surf.z
-    @views xzpts[:, 3] .= wall.x
-    @views xzpts[:, 4] .= wall.z
-    return wv, grri, xzpts
+    return wv, green_fourier, plasma_surf.r, wall.r
 end
 
 """