diff --git a/src/interface.jl b/src/interface.jl index dd82630b..eecf1bbc 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -100,6 +100,8 @@ Several keyword arguments can be used to customize the compilation process: - `always_inline` specifies if the Julia front-end should inline all functions into one if possible. - `opt_level`: the optimization level to use (default: 2) +- `debug_level`: the amount of debug information to emit and the verbosity of device-side + exception reporting (0, 1 or 2; default: the running session's `-g` level). - `libraries`: link the GPU runtime and `libdevice` libraries (default: true) - `optimize`: optimize the code (default: true) - `cleanup`: run cleanup passes on the code (default: true) @@ -115,6 +117,7 @@ struct CompilerConfig{T,P} entry_abi::Symbol always_inline::Bool opt_level::Int + debug_level::Int libraries::Bool optimize::Bool cleanup::Bool @@ -127,15 +130,17 @@ struct CompilerConfig{T,P} function CompilerConfig(target::AbstractCompilerTarget, params::AbstractCompilerParams; kernel=true, name=nothing, entry_abi=:specfunc, toplevel=true, - always_inline=false, opt_level=2, optimize=toplevel, + always_inline=false, opt_level=2, + debug_level=Base.JLOptions().debug_level, optimize=toplevel, libraries=toplevel, cleanup=toplevel, validate=toplevel, strip=false, only_entry=false) if entry_abi ∉ (:specfunc, :func) error("Unknown entry_abi=$entry_abi") end new{typeof(target), typeof(params)}(target, params, kernel, name, entry_abi, - always_inline, opt_level, libraries, optimize, - cleanup, validate, strip, toplevel, only_entry) + always_inline, opt_level, debug_level, libraries, + optimize, cleanup, validate, strip, toplevel, + only_entry) end end @@ -143,7 +148,8 @@ end function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params, kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi, always_inline=cfg.always_inline, opt_level=cfg.opt_level, - libraries=cfg.libraries, optimize=cfg.optimize, cleanup=cfg.cleanup, + debug_level=cfg.debug_level, libraries=cfg.libraries, + optimize=cfg.optimize, cleanup=cfg.cleanup, validate=cfg.validate, strip=cfg.strip, toplevel=cfg.toplevel, only_entry=cfg.only_entry) # deriving a non-toplevel job disables certain features @@ -156,7 +162,8 @@ function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.param validate = false end CompilerConfig(target, params; kernel, entry_abi, name, always_inline, opt_level, - libraries, optimize, cleanup, validate, strip, toplevel, only_entry) + debug_level, libraries, optimize, cleanup, validate, strip, toplevel, + only_entry) end function Base.show(io::IO, @nospecialize(cfg::CompilerConfig{T})) where {T} @@ -172,6 +179,7 @@ function Base.hash(cfg::CompilerConfig, h::UInt) h = hash(cfg.entry_abi, h) h = hash(cfg.always_inline, h) h = hash(cfg.opt_level, h) + h = hash(cfg.debug_level, h) h = hash(cfg.libraries, h) h = hash(cfg.optimize, h) h = hash(cfg.cleanup, h) @@ -354,11 +362,11 @@ end # how much debuginfo to emit function llvm_debug_info(@nospecialize(job::CompilerJob)) - if Base.JLOptions().debug_level == 0 + if job.config.debug_level == 0 LLVM.API.LLVMDebugEmissionKindNoDebug - elseif Base.JLOptions().debug_level == 1 + elseif job.config.debug_level == 1 LLVM.API.LLVMDebugEmissionKindLineTablesOnly - elseif Base.JLOptions().debug_level >= 2 + elseif job.config.debug_level >= 2 LLVM.API.LLVMDebugEmissionKindFullDebug end end diff --git a/src/irgen.jl b/src/irgen.jl index ec74ce6a..efeb835b 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -146,6 +146,11 @@ function irgen(@nospecialize(job::CompilerJob)) global current_job current_job = job can_throw(job) || lower_throw!(mod) + + # resolve the `julia.gpu.debug_level` intrinsic (see `kernel_debug_level_value`) to + # the job's configured level, so device code can branch on it as a compile-time + # constant that is part of the cache key (unlike reading the `-g` global directly). + lower_debug_level!(job, mod) end return mod, compiled, gv_to_value @@ -251,9 +256,9 @@ function emit_exception!(builder, name, inst) mod = LLVM.parent(fun) # report the exception - if Base.JLOptions().debug_level >= 1 + if job.config.debug_level >= 1 name = globalstring_ptr!(builder, name, "exception") - if Base.JLOptions().debug_level == 1 + if job.config.debug_level == 1 call!(builder, Runtime.get(:report_exception), [name]) else call!(builder, Runtime.get(:report_exception_name), [name]) @@ -261,7 +266,7 @@ function emit_exception!(builder, name, inst) end # report each frame - if Base.JLOptions().debug_level >= 2 + if job.config.debug_level >= 2 rt = Runtime.get(:report_exception_frame) ft = convert(LLVM.FunctionType, rt) bt = backtrace(inst) @@ -1101,6 +1106,79 @@ function kernel_state_value(state) end end + +## debug level + +# device code can query the job's configured debug level as a compile-time constant via +# `kernel_debug_level()`, which emits the `julia.gpu.debug_level` intrinsic; `lower_debug_level!` +# (run from `irgen`, with `current_job` in scope) replaces it with the constant. this keeps +# the level part of the cache key (it lives in `CompilerConfig`), unlike reading the `-g` +# global at parse time (which would bake the wrong level under pkgimage reuse across `-g`). + +function debug_level_intr(mod::LLVM.Module) + intr = if haskey(functions(mod), "julia.gpu.debug_level") + functions(mod)["julia.gpu.debug_level"] + else + LLVM.Function(mod, "julia.gpu.debug_level", LLVM.FunctionType(LLVM.Int32Type())) + end + push!(function_attributes(intr), EnumAttribute("readnone", 0)) + + return intr +end + +# run-time equivalent: emits a call to the debug-level intrinsic, returning the job's +# configured `debug_level` as an `Int32` (lowered to a constant by `lower_debug_level!`). +function kernel_debug_level_value() + @dispose ctx=Context() begin + T_int32 = LLVM.Int32Type() + + # create function + llvm_f, _ = create_function(T_int32) + mod = LLVM.parent(llvm_f) + + # get intrinsic + intr = debug_level_intr(mod) + intr_ft = function_type(intr) + + # generate IR + @dispose builder=IRBuilder() begin + entry = BasicBlock(llvm_f, "entry") + position!(builder, entry) + + val = call!(builder, intr_ft, intr, Value[], "debug_level") + + ret!(builder, val) + end + + call_function(llvm_f, Int32) + end +end + +# device-facing accessor: the compiling job's debug level as an `Int32` compile-time constant. +# exported for back-end device runtimes (e.g. to gate exception reporting); it takes no +# back-end-specific argument, so unlike `kernel_state` there's no need for a per-back-end +# definition. not intended for user code. +@inline @generated kernel_debug_level() = kernel_debug_level_value() +export kernel_debug_level + +# replace every `julia.gpu.debug_level` call with the job's configured level +function lower_debug_level!(@nospecialize(job::CompilerJob), mod::LLVM.Module) + haskey(functions(mod), "julia.gpu.debug_level") || return false + + intr = functions(mod)["julia.gpu.debug_level"] + level = ConstantInt(LLVM.Int32Type(), job.config.debug_level) + for use in collect(uses(intr)) + inst = user(use) + @assert inst isa LLVM.CallInst + replace_uses!(inst, level) + erase!(inst) + end + @assert isempty(uses(intr)) + erase!(intr) + + return true +end + # convert kernel state argument from pass-by-value to pass-by-reference # # the kernel state argument is always passed by value to avoid codegen issues with byval.