Fix inline asm register allocator corrupting rbx#1629
Fix inline asm register allocator corrupting rbx#1629CathalMullan wants to merge 1 commit intorust-lang:mainfrom
rbx#1629Conversation
| // Mark `bx` as in-use so the allocator won't corrupt it. | ||
| if self.arch == InlineAsmArch::X86_64 { | ||
| allocated.insert(InlineAsmReg::X86(X86InlineAsmReg::bx), (true, true)); | ||
| } |
There was a problem hiding this comment.
I picked rbx as that isn't allowed as input or output register due to being reserved by LLVM, so I'm surprised bx even gets returned as allocatable register. https://github.com/rust-lang/rust/blob/ba1567989ee7774a1fb53aa680a8e4e8daa0f519/compiler/rustc_target/src/asm/x86.rs#L203-L217
There was a problem hiding this comment.
It's bl which doesn't seem to get filtered: https://github.com/rust-lang/rust/blob/main/compiler/rustc_target/src/asm/x86.rs#L253
Here's the full build logs for Graviola where it gets corrupted:
Details
warning: unsupported x86 llvm intrinsic llvm.x86.avx512.pternlog.d.128; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.avx512.pternlog.d.512; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.avx512.pternlog.q.512; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.avx512.pshuf.b.512; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.aesni.aesenc.512; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.aesni.aesenclast.512; replacing with trap
warning: unsupported x86 llvm intrinsic llvm.x86.pclmulqdq.512; replacing with trap
warning: `graviola` (lib test) generated 7 warnings
error: invalid operand for instruction
|
note: instantiated into assembly here
--> <inline asm>:14:8
|
14 | vpxor y0, y0, y0
| ^
error: invalid operand for instruction
|
note: instantiated into assembly here
--> <inline asm>:17:8
|
17 | vmovdqu [rax], y0
| ^
error: aborting due to 2 previous errors
error: Failed to assemble `.globl _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.type _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,@function
.section .text._RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,"ax",@progbits
_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0:
.intel_syntax noprefix
push rbp
mov rbp,rsp
push rbx
mov rbx,rdi
mov rax, [rbx+0x0]
mov rcx, [rbx+0x8]
vpxor y0, y0, y0
2: cmp rcx, 32
jl 3f
vmovdqu [rax], y0
add rax, 32
sub rcx, 32
jmp 2b
3: sub rcx, 1
jl 4f
mov byte ptr [rax], 0
add rax, 1
jmp 3b
4:
pop rbx
pop rbp
ret
.att_syntax
.size _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0, .-_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu11__zero_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.text
.globl _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.type _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,@function
.section .text._RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,"ax",@progbits
_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0:
.intel_syntax noprefix
push rbp
mov rbp,rsp
push rbx
mov rbx,rdi
vzeroall
pop rbx
pop rbp
ret
.att_syntax
.size _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0, .-_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu15leave_cpu_state__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.text
.globl _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.type _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,@function
.section .text._RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0,"ax",@progbits
_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0:
.intel_syntax noprefix
push rbp
mov rbp,rsp
push rbx
mov rbx,rdi
mov [rbx+0x0], bl
mov rax, [rbx+0x8]
mov rcx, [rbx+0x10]
mov rdx, [rbx+0x18]
mov bl, [rbx+0x20]
mov sil, [rbx+0x1]
2: cmp rdx, 0
je 3f
mov bl, [rax]
xor bl, [rcx]
or sil, bl
add rax, 1
add rcx, 1
sub rdx, 1
jmp 2b
3:
mov [rbx+0x1], sil
mov bl, [rbx+0x0]
pop rbx
pop rbp
ret
.att_syntax
.size _RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0, .-_RNvNtNtNtCs1fs6YDMng9x_8graviola3low6x86_643cpu16ct_compare_bytes__inline_asm_9p5s6xwqy0njxb3man23ssxa5_n0
.text
`
warning: `graviola` (lib) generated 7 warnings (7 duplicates)
error: could not compile `graviola` (lib) due to 1 previous error; 7 warnings emitted
There was a problem hiding this comment.
I think the issue is that bl and bh are missing % rbx_reserved at https://github.com/rust-lang/rust/blob/8ddf4ef064fb702fed0f3d239ec8d0bac607484e/compiler/rustc_target/src/asm/x86.rs#L253-L254 This would need to be fixed on the rustc side.
x86: reserve `bl` and `bh` registers to match `rbx` `bl` and `bh` need to be explicitly marked as reserved, as they are sub-registers of `rbx`, which is reserved by LLVM. Discovered this while trying to run Graviola through Cranelift, which was becoming corrupted due to the register allocator assigning `bl`: rust-lang/rustc_codegen_cranelift#1629 cc: @bjorn3
Rollup merge of #153302 - CathalMullan:rbx, r=Amanieu x86: reserve `bl` and `bh` registers to match `rbx` `bl` and `bh` need to be explicitly marked as reserved, as they are sub-registers of `rbx`, which is reserved by LLVM. Discovered this while trying to run Graviola through Cranelift, which was becoming corrupted due to the register allocator assigning `bl`: rust-lang/rustc_codegen_cranelift#1629 cc: @bjorn3
The register allocator can assign overlapping registers of
rbx, corrupting the pointer used by the inline asm wrapper.This fix marks
bxas in-use to prevent this.Encountered this while trying to run the Graviola test suite.
With this fix (and #1628, ctz/graviola#150), the entire Graviola test suite passes on x86_64, though 4 tests fail on AVX-512 hosts due to missing intrinsics.