Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ Several keyword arguments can be used to customize the compilation process:
- `always_inline` specifies if the Julia front-end should inline all functions into one if
possible.
- `opt_level`: the optimization level to use (default: 2)
- `debug_level`: the amount of debug information to emit and the verbosity of device-side
exception reporting (0, 1 or 2; default: the running session's `-g` level).
- `libraries`: link the GPU runtime and `libdevice` libraries (default: true)
- `optimize`: optimize the code (default: true)
- `cleanup`: run cleanup passes on the code (default: true)
Expand All @@ -115,6 +117,7 @@ struct CompilerConfig{T,P}
entry_abi::Symbol
always_inline::Bool
opt_level::Int
debug_level::Int
libraries::Bool
optimize::Bool
cleanup::Bool
Expand All @@ -127,23 +130,26 @@ struct CompilerConfig{T,P}

function CompilerConfig(target::AbstractCompilerTarget, params::AbstractCompilerParams;
kernel=true, name=nothing, entry_abi=:specfunc, toplevel=true,
always_inline=false, opt_level=2, optimize=toplevel,
always_inline=false, opt_level=2,
debug_level=Base.JLOptions().debug_level, optimize=toplevel,
libraries=toplevel, cleanup=toplevel, validate=toplevel,
strip=false, only_entry=false)
if entry_abi ∉ (:specfunc, :func)
error("Unknown entry_abi=$entry_abi")
end
new{typeof(target), typeof(params)}(target, params, kernel, name, entry_abi,
always_inline, opt_level, libraries, optimize,
cleanup, validate, strip, toplevel, only_entry)
always_inline, opt_level, debug_level, libraries,
optimize, cleanup, validate, strip, toplevel,
only_entry)
end
end

# copy constructor
function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params,
kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi,
always_inline=cfg.always_inline, opt_level=cfg.opt_level,
libraries=cfg.libraries, optimize=cfg.optimize, cleanup=cfg.cleanup,
debug_level=cfg.debug_level, libraries=cfg.libraries,
optimize=cfg.optimize, cleanup=cfg.cleanup,
validate=cfg.validate, strip=cfg.strip, toplevel=cfg.toplevel,
only_entry=cfg.only_entry)
# deriving a non-toplevel job disables certain features
Expand All @@ -156,7 +162,8 @@ function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.param
validate = false
end
CompilerConfig(target, params; kernel, entry_abi, name, always_inline, opt_level,
libraries, optimize, cleanup, validate, strip, toplevel, only_entry)
debug_level, libraries, optimize, cleanup, validate, strip, toplevel,
only_entry)
end

function Base.show(io::IO, @nospecialize(cfg::CompilerConfig{T})) where {T}
Expand All @@ -172,6 +179,7 @@ function Base.hash(cfg::CompilerConfig, h::UInt)
h = hash(cfg.entry_abi, h)
h = hash(cfg.always_inline, h)
h = hash(cfg.opt_level, h)
h = hash(cfg.debug_level, h)
h = hash(cfg.libraries, h)
h = hash(cfg.optimize, h)
h = hash(cfg.cleanup, h)
Expand Down Expand Up @@ -354,11 +362,11 @@ end

# how much debuginfo to emit
function llvm_debug_info(@nospecialize(job::CompilerJob))
if Base.JLOptions().debug_level == 0
if job.config.debug_level == 0
LLVM.API.LLVMDebugEmissionKindNoDebug
elseif Base.JLOptions().debug_level == 1
elseif job.config.debug_level == 1
LLVM.API.LLVMDebugEmissionKindLineTablesOnly
elseif Base.JLOptions().debug_level >= 2
elseif job.config.debug_level >= 2
LLVM.API.LLVMDebugEmissionKindFullDebug
end
end
Expand Down
84 changes: 81 additions & 3 deletions src/irgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ function irgen(@nospecialize(job::CompilerJob))
global current_job
current_job = job
can_throw(job) || lower_throw!(mod)

# resolve the `julia.gpu.debug_level` intrinsic (see `kernel_debug_level_value`) to
# the job's configured level, so device code can branch on it as a compile-time
# constant that is part of the cache key (unlike reading the `-g` global directly).
lower_debug_level!(job, mod)
end

return mod, compiled, gv_to_value
Expand Down Expand Up @@ -251,17 +256,17 @@ function emit_exception!(builder, name, inst)
mod = LLVM.parent(fun)

# report the exception
if Base.JLOptions().debug_level >= 1
if job.config.debug_level >= 1
name = globalstring_ptr!(builder, name, "exception")
if Base.JLOptions().debug_level == 1
if job.config.debug_level == 1
call!(builder, Runtime.get(:report_exception), [name])
else
call!(builder, Runtime.get(:report_exception_name), [name])
end
end

# report each frame
if Base.JLOptions().debug_level >= 2
if job.config.debug_level >= 2
rt = Runtime.get(:report_exception_frame)
ft = convert(LLVM.FunctionType, rt)
bt = backtrace(inst)
Expand Down Expand Up @@ -1101,6 +1106,79 @@ function kernel_state_value(state)
end
end


## debug level

# device code can query the job's configured debug level as a compile-time constant via
# `kernel_debug_level()`, which emits the `julia.gpu.debug_level` intrinsic; `lower_debug_level!`
# (run from `irgen`, with `current_job` in scope) replaces it with the constant. this keeps
# the level part of the cache key (it lives in `CompilerConfig`), unlike reading the `-g`
# global at parse time (which would bake the wrong level under pkgimage reuse across `-g`).

function debug_level_intr(mod::LLVM.Module)
intr = if haskey(functions(mod), "julia.gpu.debug_level")
functions(mod)["julia.gpu.debug_level"]
else
LLVM.Function(mod, "julia.gpu.debug_level", LLVM.FunctionType(LLVM.Int32Type()))
end
push!(function_attributes(intr), EnumAttribute("readnone", 0))

return intr
end

# run-time equivalent: emits a call to the debug-level intrinsic, returning the job's
# configured `debug_level` as an `Int32` (lowered to a constant by `lower_debug_level!`).
function kernel_debug_level_value()
@dispose ctx=Context() begin
T_int32 = LLVM.Int32Type()

# create function
llvm_f, _ = create_function(T_int32)
mod = LLVM.parent(llvm_f)

# get intrinsic
intr = debug_level_intr(mod)
intr_ft = function_type(intr)

# generate IR
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

val = call!(builder, intr_ft, intr, Value[], "debug_level")

ret!(builder, val)
end

call_function(llvm_f, Int32)
end
end

# device-facing accessor: the compiling job's debug level as an `Int32` compile-time constant.
# exported for back-end device runtimes (e.g. to gate exception reporting); it takes no
# back-end-specific argument, so unlike `kernel_state` there's no need for a per-back-end
# definition. not intended for user code.
@inline @generated kernel_debug_level() = kernel_debug_level_value()
export kernel_debug_level

# replace every `julia.gpu.debug_level` call with the job's configured level
function lower_debug_level!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
haskey(functions(mod), "julia.gpu.debug_level") || return false

intr = functions(mod)["julia.gpu.debug_level"]
level = ConstantInt(LLVM.Int32Type(), job.config.debug_level)
for use in collect(uses(intr))
inst = user(use)
@assert inst isa LLVM.CallInst
replace_uses!(inst, level)
erase!(inst)
end
@assert isempty(uses(intr))
erase!(intr)

return true
end

# convert kernel state argument from pass-by-value to pass-by-reference
#
# the kernel state argument is always passed by value to avoid codegen issues with byval.
Expand Down
Loading