-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Slow compiles with write_volatile to a large array #39001
Comments
FWIW, all of the time is in LLVM, specifically the "Dominator Tree Construction" pass. So it could be considered an LLVM bug, but it's probably caused by rustc emitting weird IR.
|
There's a lot of IR. It's like this:
|
On current stable and nightly, the example now compiles instantly. The optimized IR looks like this: ; test::slow
; Function Attrs: nounwind readnone uwtable
define void @_ZN4test4slow17hb13ef2f0fb17248eE() unnamed_addr #0 {
start:
%_5.i = alloca [4096 x i8], align 1
%buf = alloca [4096 x i8], align 1
%buf.0.sroa_idx1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %buf, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 4096, i8* nonnull %buf.0.sroa_idx1)
%_5.0.sroa_idx2.i = getelementptr inbounds [4096 x i8], [4096 x i8]* %_5.i, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 4096, i8* nonnull %_5.0.sroa_idx2.i)
call void @llvm.memset.p0i8.i64(i8* nonnull align 1 %_5.0.sroa_idx2.i, i8 0, i64 4096, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %buf.0.sroa_idx1, i8* nonnull align 1 %_5.0.sroa_idx2.i, i64 4096, i1 true) #2, !noalias !0
call void @llvm.lifetime.end.p0i8(i64 4096, i8* nonnull %_5.0.sroa_idx2.i)
call void @llvm.lifetime.end.p0i8(i64 4096, i8* nonnull %buf.0.sroa_idx1)
ret void
} and the unoptimized IR is pretty small as well: ; core::mem::uninitialized
; Function Attrs: inlinehint uwtable
define void @_ZN4core3mem13uninitialized17h4537c4eab9e44a67E([4096 x i8]* noalias nocapture sret dereferenceable(4096)) unnamed_addr #0 {
start:
br label %bb1
bb1: ; preds = %start
ret void
}
; core::mem::zeroed
; Function Attrs: inlinehint uwtable
define void @_ZN4core3mem6zeroed17h4a86ff056d9543abE([4096 x i8]* noalias nocapture sret dereferenceable(4096)) unnamed_addr #0 {
start:
%1 = bitcast [4096 x i8]* %0 to i8*
call void @llvm.memset.p0i8.i64(i8* align 1 %1, i8 0, i64 4096, i1 false)
br label %bb1
bb1: ; preds = %start
ret void
}
; core::ptr::write_volatile
; Function Attrs: inlinehint uwtable
define void @_ZN4core3ptr14write_volatile17h820044959ff57a02E([4096 x i8]* %dst, [4096 x i8]* noalias nocapture dereferenceable(4096) %src) unnamed_addr #0 {
start:
%_5 = alloca [4096 x i8], align 1
%0 = bitcast [4096 x i8]* %src to i8*
%1 = bitcast [4096 x i8]* %_5 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %1, i8* align 1 %0, i64 4096, i1 false)
%2 = bitcast [4096 x i8]* %_5 to i8*
%3 = bitcast [4096 x i8]* %dst to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %3, i8* align 1 %2, i64 4096, i1 true)
br label %bb1
bb1: ; preds = %start
ret void
}
; test::slow
; Function Attrs: uwtable
define void @_ZN4test4slow17hb13ef2f0fb17248eE() unnamed_addr #1 {
start:
%_7 = alloca [4096 x i8], align 1
%buf = alloca [4096 x i8], align 1
; call core::mem::uninitialized
call void @_ZN4core3mem13uninitialized17h4537c4eab9e44a67E([4096 x i8]* noalias nocapture sret dereferenceable(4096) %buf)
br label %bb1
bb1: ; preds = %start
; call core::mem::zeroed
call void @_ZN4core3mem6zeroed17h4a86ff056d9543abE([4096 x i8]* noalias nocapture sret dereferenceable(4096) %_7)
br label %bb2
bb2: ; preds = %bb1
; call core::ptr::write_volatile
call void @_ZN4core3ptr14write_volatile17h820044959ff57a02E([4096 x i8]* %buf, [4096 x i8]* noalias nocapture dereferenceable(4096) %_7)
br label %bb3
bb3: ; preds = %bb2
ret void
} |
Closing this as fixed, per the previous comment. |
When using
write_volatile
on an array with a large number of elements, it takes a long time to compile.I tried this code:
With the following command line:
rustc -Z time-passes -O --crate-type lib --emit=asm
The time taken to compile depends on the array size. For sizes up to 3098, it takes less than a second; for sizes of 3099 and above, it takes over a minute.
Meta
rustc --version --verbose
:The text was updated successfully, but these errors were encountered: