From 1d8dd82bdcf448001f32cafed88cf8179a77b8d0 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 1 Jun 2026 20:22:24 +0200 Subject: [PATCH 1/3] Make the debug level a CompilerConfig field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a debug_level field (default the session's -g) so it is part of the compile cache key; emit_exception! and llvm_debug_info read it instead of Base.JLOptions().debug_level. Add kernel_debug_level() — the julia.gpu.debug_level intrinsic, lowered to the per-job constant by lower_debug_level! in irgen — so device code can branch on the level as a cache-keyed compile-time constant rather than the -g global. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/interface.jl | 25 +++++++++++----- src/irgen.jl | 77 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 11 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index dd82630b..57341837 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -100,6 +100,9 @@ Several keyword arguments can be used to customize the compilation process: - `always_inline` specifies if the Julia front-end should inline all functions into one if possible. - `opt_level`: the optimization level to use (default: 2) +- `debug_level`: the amount of debug information to emit and the verbosity of device-side + exception reporting (default: the running session's `-g` level). Part of the cache key, + so kernels compiled at different levels don't alias. - `libraries`: link the GPU runtime and `libdevice` libraries (default: true) - `optimize`: optimize the code (default: true) - `cleanup`: run cleanup passes on the code (default: true) @@ -115,6 +118,7 @@ struct CompilerConfig{T,P} entry_abi::Symbol always_inline::Bool opt_level::Int + debug_level::Int libraries::Bool optimize::Bool cleanup::Bool @@ -127,15 +131,17 @@ struct CompilerConfig{T,P} function CompilerConfig(target::AbstractCompilerTarget, params::AbstractCompilerParams; kernel=true, name=nothing, entry_abi=:specfunc, toplevel=true, - always_inline=false, opt_level=2, optimize=toplevel, + always_inline=false, opt_level=2, + debug_level=Base.JLOptions().debug_level, optimize=toplevel, libraries=toplevel, cleanup=toplevel, validate=toplevel, strip=false, only_entry=false) if entry_abi ∉ (:specfunc, :func) error("Unknown entry_abi=$entry_abi") end new{typeof(target), typeof(params)}(target, params, kernel, name, entry_abi, - always_inline, opt_level, libraries, optimize, - cleanup, validate, strip, toplevel, only_entry) + always_inline, opt_level, debug_level, libraries, + optimize, cleanup, validate, strip, toplevel, + only_entry) end end @@ -143,7 +149,8 @@ end function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params, kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi, always_inline=cfg.always_inline, opt_level=cfg.opt_level, - libraries=cfg.libraries, optimize=cfg.optimize, cleanup=cfg.cleanup, + debug_level=cfg.debug_level, libraries=cfg.libraries, + optimize=cfg.optimize, cleanup=cfg.cleanup, validate=cfg.validate, strip=cfg.strip, toplevel=cfg.toplevel, only_entry=cfg.only_entry) # deriving a non-toplevel job disables certain features @@ -156,7 +163,8 @@ function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.param validate = false end CompilerConfig(target, params; kernel, entry_abi, name, always_inline, opt_level, - libraries, optimize, cleanup, validate, strip, toplevel, only_entry) + debug_level, libraries, optimize, cleanup, validate, strip, toplevel, + only_entry) end function Base.show(io::IO, @nospecialize(cfg::CompilerConfig{T})) where {T} @@ -172,6 +180,7 @@ function Base.hash(cfg::CompilerConfig, h::UInt) h = hash(cfg.entry_abi, h) h = hash(cfg.always_inline, h) h = hash(cfg.opt_level, h) + h = hash(cfg.debug_level, h) h = hash(cfg.libraries, h) h = hash(cfg.optimize, h) h = hash(cfg.cleanup, h) @@ -354,11 +363,11 @@ end # how much debuginfo to emit function llvm_debug_info(@nospecialize(job::CompilerJob)) - if Base.JLOptions().debug_level == 0 + if job.config.debug_level == 0 LLVM.API.LLVMDebugEmissionKindNoDebug - elseif Base.JLOptions().debug_level == 1 + elseif job.config.debug_level == 1 LLVM.API.LLVMDebugEmissionKindLineTablesOnly - elseif Base.JLOptions().debug_level >= 2 + elseif job.config.debug_level >= 2 LLVM.API.LLVMDebugEmissionKindFullDebug end end diff --git a/src/irgen.jl b/src/irgen.jl index ec74ce6a..8a1f8bcb 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -146,6 +146,11 @@ function irgen(@nospecialize(job::CompilerJob)) global current_job current_job = job can_throw(job) || lower_throw!(mod) + + # resolve the `julia.gpu.debug_level` intrinsic (see `kernel_debug_level_value`) to + # the job's configured level, so device code can branch on it as a compile-time + # constant that is part of the cache key (unlike reading the `-g` global directly). + lower_debug_level!(job, mod) end return mod, compiled, gv_to_value @@ -251,9 +256,9 @@ function emit_exception!(builder, name, inst) mod = LLVM.parent(fun) # report the exception - if Base.JLOptions().debug_level >= 1 + if job.config.debug_level >= 1 name = globalstring_ptr!(builder, name, "exception") - if Base.JLOptions().debug_level == 1 + if job.config.debug_level == 1 call!(builder, Runtime.get(:report_exception), [name]) else call!(builder, Runtime.get(:report_exception_name), [name]) @@ -261,7 +266,7 @@ function emit_exception!(builder, name, inst) end # report each frame - if Base.JLOptions().debug_level >= 2 + if job.config.debug_level >= 2 rt = Runtime.get(:report_exception_frame) ft = convert(LLVM.FunctionType, rt) bt = backtrace(inst) @@ -1101,6 +1106,72 @@ function kernel_state_value(state) end end + +## debug level + +# device code can query the job's configured debug level as a compile-time constant via +# `kernel_debug_level()`, which emits the `julia.gpu.debug_level` intrinsic; `lower_debug_level!` +# (run from `irgen`, with `current_job` in scope) replaces it with the constant. this keeps +# the level part of the cache key (it lives in `CompilerConfig`), unlike reading the `-g` +# global at parse time (which would bake the wrong level under pkgimage reuse across `-g`). + +function debug_level_intr(mod::LLVM.Module) + intr = if haskey(functions(mod), "julia.gpu.debug_level") + functions(mod)["julia.gpu.debug_level"] + else + LLVM.Function(mod, "julia.gpu.debug_level", LLVM.FunctionType(LLVM.Int32Type())) + end + push!(function_attributes(intr), EnumAttribute("readnone", 0)) + + return intr +end + +# run-time equivalent: emits a call to the debug-level intrinsic, returning the job's +# configured `debug_level` as an `Int32` (lowered to a constant by `lower_debug_level!`). +function kernel_debug_level_value() + @dispose ctx=Context() begin + T_int32 = LLVM.Int32Type() + + # create function + llvm_f, _ = create_function(T_int32) + mod = LLVM.parent(llvm_f) + + # get intrinsic + intr = debug_level_intr(mod) + intr_ft = function_type(intr) + + # generate IR + @dispose builder=IRBuilder() begin + entry = BasicBlock(llvm_f, "entry") + position!(builder, entry) + + val = call!(builder, intr_ft, intr, Value[], "debug_level") + + ret!(builder, val) + end + + call_function(llvm_f, Int32) + end +end + +# replace every `julia.gpu.debug_level` call with the job's configured level +function lower_debug_level!(@nospecialize(job::CompilerJob), mod::LLVM.Module) + haskey(functions(mod), "julia.gpu.debug_level") || return false + + intr = functions(mod)["julia.gpu.debug_level"] + level = ConstantInt(LLVM.Int32Type(), job.config.debug_level) + for use in collect(uses(intr)) + inst = user(use) + @assert inst isa LLVM.CallInst + replace_uses!(inst, level) + erase!(inst) + end + @assert isempty(uses(intr)) + erase!(intr) + + return true +end + # convert kernel state argument from pass-by-value to pass-by-reference # # the kernel state argument is always passed by value to avoid codegen issues with byval. From 977c4f61a819b4d83f817f9af8b85a60fef68728 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 1 Jun 2026 23:18:12 +0200 Subject: [PATCH 2/3] Define and export kernel_debug_level It takes no back-end-specific argument (unlike kernel_state, which bakes in each back-end's KernelState type), so define the @generated accessor here and export it for back-end device runtimes rather than having each define a proxy. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/irgen.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/irgen.jl b/src/irgen.jl index 8a1f8bcb..efeb835b 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -1154,6 +1154,13 @@ function kernel_debug_level_value() end end +# device-facing accessor: the compiling job's debug level as an `Int32` compile-time constant. +# exported for back-end device runtimes (e.g. to gate exception reporting); it takes no +# back-end-specific argument, so unlike `kernel_state` there's no need for a per-back-end +# definition. not intended for user code. +@inline @generated kernel_debug_level() = kernel_debug_level_value() +export kernel_debug_level + # replace every `julia.gpu.debug_level` call with the job's configured level function lower_debug_level!(@nospecialize(job::CompilerJob), mod::LLVM.Module) haskey(functions(mod), "julia.gpu.debug_level") || return false From f5ece924def5902a75d7a1da3a2ac25c11e92a3c Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 2 Jun 2026 07:52:08 +0200 Subject: [PATCH 3/3] Clarify docs. --- src/interface.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index 57341837..eecf1bbc 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -101,8 +101,7 @@ Several keyword arguments can be used to customize the compilation process: possible. - `opt_level`: the optimization level to use (default: 2) - `debug_level`: the amount of debug information to emit and the verbosity of device-side - exception reporting (default: the running session's `-g` level). Part of the cache key, - so kernels compiled at different levels don't alias. + exception reporting (0, 1 or 2; default: the running session's `-g` level). - `libraries`: link the GPU runtime and `libdevice` libraries (default: true) - `optimize`: optimize the code (default: true) - `cleanup`: run cleanup passes on the code (default: true)