diff --git a/juliac/.gitignore b/juliac/.gitignore new file mode 100644 index 0000000..49aa449 --- /dev/null +++ b/juliac/.gitignore @@ -0,0 +1,7 @@ +Manifest.toml +recfact_server +recfact_debug +*.so +*.o +build/ +test_* diff --git a/juliac/Project.toml b/juliac/Project.toml new file mode 100644 index 0000000..58cfa09 --- /dev/null +++ b/juliac/Project.toml @@ -0,0 +1,3 @@ +[deps] +RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/juliac/shim.jl b/juliac/shim.jl new file mode 100644 index 0000000..0eaad63 --- /dev/null +++ b/juliac/shim.jl @@ -0,0 +1,72 @@ +module RecursiveFactorizationShim + +import RecursiveFactorization +using LinearAlgebra: BlasInt +import Base.@ccallable + +# ============================================================================ +# @ccallable entry points for RecursiveFactorization.lu! +# +# Interface mirrors LAPACK dgetrf/sgetrf: +# A: pointer to column-major matrix data (modified in-place to L\U) +# m: number of rows +# n: number of columns +# ipiv: pointer to pivot index array (length min(m,n)), output +# returns: info (0 = success, k > 0 = U(k,k) is exactly zero) +# ============================================================================ + +# --- Float64, with pivoting, no threading --- +@ccallable function recursive_lu_f64!(A::Ptr{Float64}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float64}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(true), Val(false); check = false) + return Int64(F.info) +end + +# --- Float32, with pivoting, no threading --- +@ccallable function recursive_lu_f32!(A::Ptr{Float32}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float32}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(true), Val(false); check = false) + return Int64(F.info) +end + +# --- Float64, with pivoting, threaded --- +@ccallable function recursive_lu_f64_threaded!(A::Ptr{Float64}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float64}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(true), Val(true); check = false) + return Int64(F.info) +end + +# --- Float32, with pivoting, threaded --- +@ccallable function recursive_lu_f32_threaded!(A::Ptr{Float32}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float32}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(true), Val(true); check = false) + return Int64(F.info) +end + +# --- Float64, no pivoting, no threading --- +@ccallable function recursive_lu_f64_nopiv!(A::Ptr{Float64}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float64}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(false), Val(false); check = false) + return Int64(F.info) +end + +# --- Float32, no pivoting, no threading --- +@ccallable function recursive_lu_f32_nopiv!(A::Ptr{Float32}, m::Int64, n::Int64, + ipiv::Ptr{Int64})::Int64 + mat = unsafe_wrap(Matrix{Float32}, A, (m, n)) + ipiv_vec = unsafe_wrap(Vector{Int64}, ipiv, min(m, n)) + F = RecursiveFactorization.lu!(mat, ipiv_vec, Val(false), Val(false); check = false) + return Int64(F.info) +end + +end # module diff --git a/juliac/shim_exe.jl b/juliac/shim_exe.jl new file mode 100644 index 0000000..b9f5d53 --- /dev/null +++ b/juliac/shim_exe.jl @@ -0,0 +1,125 @@ +# Signal to RecursiveFactorization.__init__ that we ARE the server binary, +# so it should not try to start a subprocess server (which would recurse). +ENV["RECFACT_SERVER"] = "1" + +import RecursiveFactorization +using LinearAlgebra: BlasInt + +# Low-level I/O using libc read/write on file descriptors +# This avoids needing Core.stdin which isn't available in trimmed binaries + +# Set a file descriptor to blocking mode. +# The juliac runtime's libuv may set fds to non-blocking, which breaks raw read/write. +function set_blocking!(fd::Cint)::Nothing + flags = ccall(:fcntl, Cint, (Cint, Cint), fd, 3) # F_GETFL = 3 + flags == -1 && error("fcntl F_GETFL failed") + # Clear O_NONBLOCK (0x800 on Linux) + new_flags = flags & ~Cint(0x800) + ret = ccall(:fcntl, Cint, (Cint, Cint, Cint), fd, 4, new_flags) # F_SETFL = 4 + ret == -1 && error("fcntl F_SETFL failed") + nothing +end + +function fd_read!(fd::Cint, buf::Ptr{UInt8}, nbytes::Int)::Nothing + remaining = nbytes + offset = 0 + while remaining > 0 + n = ccall(:read, Cssize_t, (Cint, Ptr{UInt8}, Csize_t), fd, buf + offset, remaining) + n <= 0 && error("read failed") + remaining -= n + offset += n + end + nothing +end + +function fd_write(fd::Cint, buf::Ptr{UInt8}, nbytes::Int)::Nothing + remaining = nbytes + offset = 0 + while remaining > 0 + n = ccall(:write, Cssize_t, (Cint, Ptr{UInt8}, Csize_t), fd, buf + offset, remaining) + n <= 0 && error("write failed") + remaining -= n + offset += n + end + nothing +end + +function read_value(fd::Cint, ::Type{T}) where {T} + buf = Ref{T}() + GC.@preserve buf fd_read!(fd, Ptr{UInt8}(pointer_from_objref(buf)), sizeof(T)) + return buf[] +end + +function write_value(fd::Cint, x::T) where {T} + buf = Ref{T}(x) + GC.@preserve buf fd_write(fd, Ptr{UInt8}(pointer_from_objref(buf)), sizeof(T)) + nothing +end + +function read_matrix!(fd::Cint, A::AbstractArray) + GC.@preserve A fd_read!(fd, Ptr{UInt8}(pointer(A)), sizeof(eltype(A)) * length(A)) + nothing +end + +function write_array(fd::Cint, A::AbstractArray) + GC.@preserve A fd_write(fd, Ptr{UInt8}(pointer(A)), sizeof(eltype(A)) * length(A)) + nothing +end + +function process_f64(fdin::Cint, fdout::Cint, m::Int64, n::Int64, mn::Int64, + pivot::Val, thread::Val) + A = Matrix{Float64}(undef, m, n) + read_matrix!(fdin, A) + ipiv = Vector{Int64}(undef, mn) + F = RecursiveFactorization.lu!(A, ipiv, pivot, thread; check = false) + write_value(fdout, Int64(F.info)) + write_array(fdout, A) + write_array(fdout, ipiv) + return nothing +end + +function process_f32(fdin::Cint, fdout::Cint, m::Int64, n::Int64, mn::Int64, + pivot::Val, thread::Val) + A = Matrix{Float32}(undef, m, n) + read_matrix!(fdin, A) + ipiv = Vector{Int64}(undef, mn) + F = RecursiveFactorization.lu!(A, ipiv, pivot, thread; check = false) + write_value(fdout, Int64(F.info)) + write_array(fdout, A) + write_array(fdout, ipiv) + return nothing +end + +function (@main)(args::Vector{String}) + fdin = Cint(0) # stdin fd + fdout = Cint(1) # stdout fd + + # The juliac runtime (libuv) may set fds to non-blocking mode. + # Reset to blocking for our raw read/write calls. + set_blocking!(fdin) + set_blocking!(fdout) + + while true + cmd = read_value(fdin, UInt8) + cmd == 0xff && break + + m = read_value(fdin, Int64) + n = read_value(fdin, Int64) + mn = min(m, n) + + if cmd == 0x00 + process_f64(fdin, fdout, m, n, mn, Val(true), Val(false)) + elseif cmd == 0x01 + process_f32(fdin, fdout, m, n, mn, Val(true), Val(false)) + elseif cmd == 0x02 + process_f64(fdin, fdout, m, n, mn, Val(true), Val(true)) + elseif cmd == 0x03 + process_f32(fdin, fdout, m, n, mn, Val(true), Val(true)) + elseif cmd == 0x04 + process_f64(fdin, fdout, m, n, mn, Val(false), Val(false)) + elseif cmd == 0x05 + process_f32(fdin, fdout, m, n, mn, Val(false), Val(false)) + end + end + return 0 +end diff --git a/src/RecursiveFactorization.jl b/src/RecursiveFactorization.jl index fb6784a..3a9304e 100644 --- a/src/RecursiveFactorization.jl +++ b/src/RecursiveFactorization.jl @@ -5,6 +5,7 @@ if isdefined(Base, :Experimental) && end include("./lu.jl") include("./butterflylu.jl") +include("./juliac_server.jl") import PrecompileTools @@ -12,4 +13,9 @@ PrecompileTools.@compile_workload begin lu!(rand(2, 2)) end +function __init__() + _init_juliac_server() + atexit(_finalize_juliac_server) +end + end # module diff --git a/src/juliac_server.jl b/src/juliac_server.jl new file mode 100644 index 0000000..53ed5f0 --- /dev/null +++ b/src/juliac_server.jl @@ -0,0 +1,301 @@ +# Juliac binary server management for RecursiveFactorization +# Provides lu!/lu via a juliac-compiled subprocess communicating over pipes. +# +# Protocol (binary, little-endian): +# Request: cmd(UInt8) + m(Int64) + n(Int64) + A(T[m*n]) +# Response: info(Int64) + A(T[m*n]) + ipiv(Int64[min(m,n)]) +# Commands: 0x00=f64, 0x01=f32, 0x02=f64_threaded, 0x03=f32_threaded, +# 0x04=f64_nopiv, 0x05=f32_nopiv, 0xff=exit + +using LinearAlgebra: BlasInt, LU, checknonsingular + +# Server process state +const _server_lock = ReentrantLock() +const _server_proc = Ref{Union{Nothing, Base.Process}}(nothing) +const _server_binary = Ref{String}("") + +function _juliac_binary_path() + # Check for binary in package juliac/ directory + pkg_dir = dirname(dirname(@__FILE__)) + return joinpath(pkg_dir, "juliac", "recfact_server") +end + +function _scratch_binary_path() + # Use a version-specific scratch directory + scratch_dir = joinpath(first(DEPOT_PATH), "scratchspaces", + "f2c3362d-daeb-58d1-803e-2bc74f2840b4", # RecursiveFactorization UUID + "juliac_v$(VERSION.major)_$(VERSION.minor)") + return joinpath(scratch_dir, "recfact_server") +end + +function _find_binary() + # Check package directory first + p = _juliac_binary_path() + isfile(p) && return p + # Then check scratch space + p = _scratch_binary_path() + isfile(p) && return p + return nothing +end + +""" + RecursiveFactorization.build_binary(; force=false, trim=:unsafe_warn, bundle=false) + +Build the juliac binary for RecursiveFactorization using JuliaC.jl. Requires Julia 1.12+. + +# Keyword Arguments +- `force::Bool=false`: Rebuild even if binary already exists. +- `trim::Symbol=:unsafe_warn`: Trimming mode (`:safe`, `:unsafe_warn`, or `:no`). +- `bundle::Bool=false`: If true, bundle libjulia and dependencies for a relocatable binary. +""" +function build_binary(; force::Bool = false, trim::Symbol = :unsafe_warn, bundle::Bool = false) + if VERSION < v"1.12" + error("Building juliac binary requires Julia 1.12+, currently running $VERSION") + end + + binary_path = _scratch_binary_path() + if !force && isfile(binary_path) + @info "Juliac binary already exists at $binary_path. Use force=true to rebuild." + return binary_path + end + + pkg_dir = dirname(dirname(@__FILE__)) + shim_src = joinpath(pkg_dir, "juliac", "shim_exe.jl") + if !isfile(shim_src) + error("Shim source not found at $shim_src") + end + + # Prepare output directory + out_dir = dirname(binary_path) + mkpath(out_dir) + + # Prepare project directory for juliac build + build_dir = joinpath(out_dir, "build") + mkpath(build_dir) + + # Create Project.toml for build + build_project = joinpath(build_dir, "Project.toml") + open(build_project, "w") do f + write(f, """ + [deps] + RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" + LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + """) + end + + # Instantiate the build project with local RecursiveFactorization + @info "Setting up build environment..." + run(```$(Base.julia_cmd()) --project=$build_dir -e " + import Pkg + Pkg.develop(path=$(repr(pkg_dir))) + Pkg.instantiate() + "```) + + trim_mode = if trim === :safe + "safe" + elseif trim === :unsafe_warn + "unsafe-warn" + elseif trim === :no + nothing + else + error("Unknown trim mode: $trim. Use :safe, :unsafe_warn, or :no.") + end + + @info "Building juliac binary via JuliaC.jl (this may take a few minutes)..." + + # Use JuliaC.jl for the compile → link → bundle pipeline + @eval import JuliaC + img = JuliaC.ImageRecipe(; + output_type = "--output-exe", + trim_mode = trim_mode, + file = shim_src, + project = build_dir, + ) + JuliaC.compile_products(img) + + link = JuliaC.LinkRecipe(; + image_recipe = img, + outname = binary_path, + ) + JuliaC.link_products(link) + + if bundle + bundle_dir = joinpath(out_dir, "bundle") + bun = JuliaC.BundleRecipe(; + link_recipe = link, + output_dir = bundle_dir, + ) + JuliaC.bundle_products(bun) + @info "Bundled binary at $bundle_dir" + end + + if !isfile(binary_path) + error("Build failed: binary not found at $binary_path") + end + + @info "Juliac binary built successfully at $binary_path" + return binary_path +end + +function _start_server() + lock(_server_lock) do + proc = _server_proc[] + if proc !== nothing && process_running(proc) + return + end + + binary = _find_binary() + if binary === nothing + return + end + + _server_binary[] = binary + _server_proc[] = open(`$binary`, write = true, read = true) + # Give the server a moment to initialize + sleep(0.1) + if !process_running(_server_proc[]) + _server_proc[] = nothing + @warn "Juliac server failed to start" + end + end +end + +function _stop_server() + lock(_server_lock) do + proc = _server_proc[] + if proc === nothing + return + end + if process_running(proc) + try + write(proc, UInt8(0xff)) + flush(proc) + catch + end + close(proc.in) + wait(proc) + end + _server_proc[] = nothing + end +end + +function _server_available() + proc = _server_proc[] + return proc !== nothing && process_running(proc) +end + +function _server_lu!(A::Matrix{T}, ipiv::Vector{Int64}, + pivot::Val{Pivot}, thread::Val{Thread}; + check::Union{Bool, Val{true}, Val{false}} = Val(true)) where {T, Pivot, Thread} + proc = _server_proc[] + m, n = size(A) + mn = min(m, n) + + # Determine command byte + cmd = if T === Float64 + if !Pivot + UInt8(0x04) + elseif Thread + UInt8(0x02) + else + UInt8(0x00) + end + else # Float32 + if !Pivot + UInt8(0x05) + elseif Thread + UInt8(0x03) + else + UInt8(0x01) + end + end + + lock(_server_lock) do + # Send request + write(proc, cmd) + write(proc, Int64(m)) + write(proc, Int64(n)) + write(proc, A) + flush(proc) + + # Read response + info_bytes = Vector{UInt8}(undef, 8) + readbytes!(proc, info_bytes, 8) + info = reinterpret(Int64, info_bytes)[1] + + A_bytes = Vector{UInt8}(undef, m * n * sizeof(T)) + readbytes!(proc, A_bytes, length(A_bytes)) + A_result = reshape(reinterpret(T, A_bytes), m, n) + copyto!(A, A_result) + + ipiv_bytes = Vector{UInt8}(undef, mn * sizeof(Int64)) + readbytes!(proc, ipiv_bytes, length(ipiv_bytes)) + ipiv_result = reinterpret(Int64, copy(ipiv_bytes)) + copyto!(ipiv, ipiv_result) + + binfo = BlasInt(info) + ((check isa Bool && check) || (check === Val(true))) && checknonsingular(binfo) + return LU(A, BlasInt.(ipiv), binfo) + end +end + +""" + RecursiveFactorization.juliac_lu!(A, [pivot, thread]; check=true) + +LU factorization using the juliac-compiled binary server. +Only supports `Matrix{Float64}` and `Matrix{Float32}`. + +Falls back to pure-Julia `lu!` if the server is not available. +""" +function juliac_lu!(A::Matrix{T}, pivot = Val(true), thread = Val(false); + check::Union{Bool, Val{true}, Val{false}} = Val(true)) where {T <: Union{Float64, Float32}} + if !_server_available() + return lu!(A, normalize_pivot(pivot), thread; check = check) + end + m, n = size(A) + mn = min(m, n) + ipiv = Vector{Int64}(undef, mn) + npivot = normalize_pivot(pivot) + return _server_lu!(A, ipiv, npivot, thread; check = check) +end + +function juliac_lu!(A::Matrix{T}, ipiv::Vector{Int64}, + pivot = Val(true), thread = Val(false); + check::Union{Bool, Val{true}, Val{false}} = Val(true)) where {T <: Union{Float64, Float32}} + if !_server_available() + return lu!(A, ipiv, normalize_pivot(pivot), thread; check = check) + end + npivot = normalize_pivot(pivot) + return _server_lu!(A, ipiv, npivot, thread; check = check) +end + +""" + RecursiveFactorization.juliac_lu(A, [pivot, thread]; check=true) + +LU factorization using the juliac-compiled binary server, on a copy of A. +""" +function juliac_lu(A::AbstractMatrix, pivot = Val(true), thread = Val(false); kwargs...) + return juliac_lu!(copy(A), pivot, thread; kwargs...) +end + +function _init_juliac_server() + # Skip server init inside juliac-compiled binaries (we ARE the server) + if ccall(:jl_generating_output, Cint, ()) != 0 + return + end + # Also skip if the RECFACT_SERVER env var is set (used by the server binary) + if haskey(ENV, "RECFACT_SERVER") + return + end + if _find_binary() !== nothing + try + _start_server() + catch + # Server startup may fail in trimmed binaries or other edge cases + end + end +end + +function _finalize_juliac_server() + _stop_server() +end diff --git a/test/runtests.jl b/test/runtests.jl index 2454246..4a9ce9a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -87,3 +87,65 @@ end end end +@testset "Juliac server" begin + if RecursiveFactorization._server_available() + @info "Juliac server is available, running binary path tests" + + # Test Float64 square + for s in (10, 50, 100, 300) + A = rand(s, s) + E = 20s * eps(Float64) + MF = RecursiveFactorization.juliac_lu(A) + BF = baselu(A) + @test MF.info == BF.info + @test norm(MF.L * MF.U - A[MF.p, :], Inf) < E + end + + # Test Float32 square + for s in (10, 50, 100) + A = rand(Float32, s, s) + E = 20s * eps(Float32) + MF = RecursiveFactorization.juliac_lu(A) + BF = baselu(A) + @test MF.info == BF.info + @test norm(Float64.(MF.L * MF.U) - Float64.(A[MF.p, :]), Inf) < E + end + + # Test rectangular (tall and wide) + for (m, n) in ((200, 100), (100, 200)) + A = rand(m, n) + E = 20m * eps(Float64) + MF = RecursiveFactorization.juliac_lu(A) + BF = baselu(A) + @test MF.info == BF.info + @test norm(MF.L * MF.U - A[MF.p, :], Inf) < E + end + + # Test juliac_lu! (mutating) + A = rand(100, 100) + A_orig = copy(A) + MF = RecursiveFactorization.juliac_lu!(A) + @test MF.info == 0 + @test norm(MF.L * MF.U - A_orig[MF.p, :], Inf) < 20 * 100 * eps(Float64) + + # Test singular matrix + A = rand(50, 50) + A[:, 25] .= 0 + MF = RecursiveFactorization.juliac_lu(A, check = false) + BF = baselu(A, check = false) + @test MF.info == BF.info + + # Test consistency with pure-Julia path + Random.seed!(42) + A1 = rand(100, 100) + A2 = copy(A1) + F_juliac = RecursiveFactorization.juliac_lu!(A1) + F_julia = RecursiveFactorization.lu!(A2) + @test F_juliac.info == F_julia.info + @test F_juliac.factors ≈ F_julia.factors + @test F_juliac.ipiv == F_julia.ipiv + else + @info "Juliac server not available, skipping binary path tests" + end +end +