-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Redundant field read in Vec::clear() #51802
Comments
On while 0 < len {
len -= 1;
} Maybe we should mark |
@newpavlov No, it's not that, and the function is clearly inlined. I think I'm not sure it's correct (there might be an off-by-one, and I'm not sure if there's any issue with EDIT: nope, my proposal doesn't work, because it needs to handle panics. |
Hm, the only difference is that in |
Yeah, well, that's what I hoped for, but This might need to be fixed in LLVM. |
Maybe some kind of |
…chton Use SetLenOnDrop in Vec::truncate() This avoids a redundant length check in some cases when calling `Vec::truncate` or `Vec::clear`. Fixes rust-lang#51802 Note that the generated code still seems suboptimal. I tested with the following functions: ```rust #[no_mangle] pub extern fn foo(x: &mut Vec<u8>) { x.clear(); } #[no_mangle] pub extern fn bar(x: &mut Vec<u8>) { x.truncate(5); } #[no_mangle] pub extern fn baz(x: &mut Vec<u8>, n: usize) { x.truncate(n); } #[no_mangle] pub extern fn foo_string(x: &mut Vec<String>) { x.clear(); } #[no_mangle] pub extern fn bar_string(x: &mut Vec<String>) { x.truncate(5); } #[no_mangle] pub extern fn baz_string(x: &mut Vec<String>, n: usize) { x.truncate(n); } ``` <details> <summary>Old output</summary> ```asm 00000000000460a0 <foo>: 460a0: 48 83 7f 10 00 cmpq $0x0,0x10(%rdi) 460a5: 74 08 je 460af <foo+0xf> 460a7: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 460ae: 00 460af: c3 retq 00000000000460b0 <bar>: 460b0: 48 83 7f 10 06 cmpq $0x6,0x10(%rdi) 460b5: 72 08 jb 460bf <bar+0xf> 460b7: 48 c7 47 10 05 00 00 movq $0x5,0x10(%rdi) 460be: 00 460bf: c3 retq 00000000000460c0 <baz>: 460c0: 48 39 77 10 cmp %rsi,0x10(%rdi) 460c4: 76 04 jbe 460ca <baz+0xa> 460c6: 48 89 77 10 mov %rsi,0x10(%rdi) 460ca: c3 retq 460cb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 00000000000460d0 <foo_string>: 460d0: 41 57 push %r15 460d2: 41 56 push %r14 460d4: 53 push %rbx 460d5: 48 8b 47 10 mov 0x10(%rdi),%rax 460d9: 48 85 c0 test %rax,%rax 460dc: 74 4a je 46128 <foo_string+0x58> 460de: 49 89 fe mov %rdi,%r14 460e1: 48 8b 0f mov (%rdi),%rcx 460e4: 48 8d 14 40 lea (%rax,%rax,2),%rdx 460e8: 48 8d 58 ff lea -0x1(%rax),%rbx 460ec: 4c 8d 3c d1 lea (%rcx,%rdx,8),%r15 460f0: 49 83 c7 f0 add $0xfffffffffffffff0,%r15 460f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 460fb: 00 00 00 460fe: 66 90 xchg %ax,%ax 46100: 49 89 5e 10 mov %rbx,0x10(%r14) 46104: 49 8b 37 mov (%r15),%rsi 46107: 48 85 f6 test %rsi,%rsi 4610a: 74 0e je 4611a <foo_string+0x4a> 4610c: 49 8b 7f f8 mov -0x8(%r15),%rdi 46110: ba 01 00 00 00 mov $0x1,%edx 46115: e8 a6 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4611a: 48 83 c3 ff add $0xffffffffffffffff,%rbx 4611e: 49 83 c7 e8 add $0xffffffffffffffe8,%r15 46122: 48 83 fb ff cmp $0xffffffffffffffff,%rbx 46126: 75 d8 jne 46100 <foo_string+0x30> 46128: 5b pop %rbx 46129: 41 5e pop %r14 4612b: 41 5f pop %r15 4612d: c3 retq 4612e: 66 90 xchg %ax,%ax 0000000000046130 <bar_string>: 46130: 41 57 push %r15 46132: 41 56 push %r14 46134: 53 push %rbx 46135: 4c 8b 7f 10 mov 0x10(%rdi),%r15 46139: 49 83 ff 06 cmp $0x6,%r15 4613d: 72 49 jb 46188 <bar_string+0x58> 4613f: 49 89 fe mov %rdi,%r14 46142: 48 8b 07 mov (%rdi),%rax 46145: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 46149: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 4614d: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 46151: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 46158: 00 00 00 4615b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 46160: 49 83 c7 ff add $0xffffffffffffffff,%r15 46164: 4d 89 7e 10 mov %r15,0x10(%r14) 46168: 48 8b 33 mov (%rbx),%rsi 4616b: 48 85 f6 test %rsi,%rsi 4616e: 74 0e je 4617e <bar_string+0x4e> 46170: 48 8b 7b f8 mov -0x8(%rbx),%rdi 46174: ba 01 00 00 00 mov $0x1,%edx 46179: e8 42 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4617e: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 46182: 49 83 ff 05 cmp $0x5,%r15 46186: 77 d8 ja 46160 <bar_string+0x30> 46188: 5b pop %rbx 46189: 41 5e pop %r14 4618b: 41 5f pop %r15 4618d: c3 retq 4618e: 66 90 xchg %ax,%ax 0000000000046190 <baz_string>: 46190: 41 57 push %r15 46192: 41 56 push %r14 46194: 41 54 push %r12 46196: 53 push %rbx 46197: 50 push %rax 46198: 4c 8b 67 10 mov 0x10(%rdi),%r12 4619c: 49 39 f4 cmp %rsi,%r12 4619f: 76 46 jbe 461e7 <baz_string+0x57> 461a1: 49 89 f6 mov %rsi,%r14 461a4: 49 89 ff mov %rdi,%r15 461a7: 48 8b 07 mov (%rdi),%rax 461aa: 4b 8d 0c 64 lea (%r12,%r12,2),%rcx 461ae: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 461b2: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 461b6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461bd: 00 00 00 461c0: 49 83 c4 ff add $0xffffffffffffffff,%r12 461c4: 4d 89 67 10 mov %r12,0x10(%r15) 461c8: 48 8b 33 mov (%rbx),%rsi 461cb: 48 85 f6 test %rsi,%rsi 461ce: 74 0e je 461de <baz_string+0x4e> 461d0: 48 8b 7b f8 mov -0x8(%rbx),%rdi 461d4: ba 01 00 00 00 mov $0x1,%edx 461d9: e8 e2 e8 ff ff callq 44ac0 <__rust_dealloc@plt> 461de: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 461e2: 4d 39 f4 cmp %r14,%r12 461e5: 77 d9 ja 461c0 <baz_string+0x30> 461e7: 48 83 c4 08 add $0x8,%rsp 461eb: 5b pop %rbx 461ec: 41 5c pop %r12 461ee: 41 5e pop %r14 461f0: 41 5f pop %r15 461f2: c3 retq 461f3: 90 nop 461f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461fb: 00 00 00 461fe: 66 90 xchg %ax,%ax ``` </details> <details> <summary>New output</summary> ```asm 0000000000084d10 <foo>: 84d10: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 84d17: 00 84d18: c3 retq 84d19: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 0000000000084d20 <bar>: 84d20: 48 8b 47 10 mov 0x10(%rdi),%rax 84d24: 48 83 f8 05 cmp $0x5,%rax 84d28: b9 05 00 00 00 mov $0x5,%ecx 84d2d: 48 0f 42 c8 cmovb %rax,%rcx 84d31: 48 89 4f 10 mov %rcx,0x10(%rdi) 84d35: c3 retq 84d36: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84d3d: 00 00 00 0000000000084d40 <baz>: 84d40: 48 8b 47 10 mov 0x10(%rdi),%rax 84d44: 48 39 f0 cmp %rsi,%rax 84d47: 48 0f 47 c6 cmova %rsi,%rax 84d4b: 48 89 47 10 mov %rax,0x10(%rdi) 84d4f: c3 retq 0000000000084d50 <foo_string>: 84d50: 41 57 push %r15 84d52: 41 56 push %r14 84d54: 53 push %rbx 84d55: 49 89 fe mov %rdi,%r14 84d58: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84d5c: 4d 85 ff test %r15,%r15 84d5f: 74 2f je 84d90 <foo_string+0x40> 84d61: 49 8b 06 mov (%r14),%rax 84d64: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84d68: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84d6c: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84d70: 48 8b 33 mov (%rbx),%rsi 84d73: 48 85 f6 test %rsi,%rsi 84d76: 74 0e je 84d86 <foo_string+0x36> 84d78: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84d7c: ba 01 00 00 00 mov $0x1,%edx 84d81: e8 1a b1 ff ff callq 7fea0 <__rust_dealloc@plt> 84d86: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84d8a: 49 83 c7 ff add $0xffffffffffffffff,%r15 84d8e: 75 e0 jne 84d70 <foo_string+0x20> 84d90: 49 c7 46 10 00 00 00 movq $0x0,0x10(%r14) 84d97: 00 84d98: 5b pop %rbx 84d99: 41 5e pop %r14 84d9b: 41 5f pop %r15 84d9d: c3 retq 84d9e: 66 90 xchg %ax,%ax 0000000000084da0 <bar_string>: 84da0: 41 57 push %r15 84da2: 41 56 push %r14 84da4: 53 push %rbx 84da5: 49 89 fe mov %rdi,%r14 84da8: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84dac: 49 83 ff 06 cmp $0x6,%r15 84db0: 72 44 jb 84df6 <bar_string+0x56> 84db2: 49 8b 06 mov (%r14),%rax 84db5: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84db9: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84dbd: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84dc1: 49 83 c7 fb add $0xfffffffffffffffb,%r15 84dc5: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84dcc: 00 00 00 84dcf: 90 nop 84dd0: 48 8b 33 mov (%rbx),%rsi 84dd3: 48 85 f6 test %rsi,%rsi 84dd6: 74 0e je 84de6 <bar_string+0x46> 84dd8: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84ddc: ba 01 00 00 00 mov $0x1,%edx 84de1: e8 ba b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84de6: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84dea: 49 83 c7 ff add $0xffffffffffffffff,%r15 84dee: 75 e0 jne 84dd0 <bar_string+0x30> 84df0: 41 bf 05 00 00 00 mov $0x5,%r15d 84df6: 4d 89 7e 10 mov %r15,0x10(%r14) 84dfa: 5b pop %rbx 84dfb: 41 5e pop %r14 84dfd: 41 5f pop %r15 84dff: c3 retq 0000000000084e00 <baz_string>: 84e00: 41 57 push %r15 84e02: 41 56 push %r14 84e04: 41 54 push %r12 84e06: 53 push %rbx 84e07: 50 push %rax 84e08: 49 89 ff mov %rdi,%r15 84e0b: 48 8b 47 10 mov 0x10(%rdi),%rax 84e0f: 49 89 c4 mov %rax,%r12 84e12: 49 29 f4 sub %rsi,%r12 84e15: 76 3c jbe 84e53 <baz_string+0x53> 84e17: 49 89 f6 mov %rsi,%r14 84e1a: 49 8b 0f mov (%r15),%rcx 84e1d: 48 8d 04 40 lea (%rax,%rax,2),%rax 84e21: 48 8d 1c c1 lea (%rcx,%rax,8),%rbx 84e25: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 84e30: 48 8b 33 mov (%rbx),%rsi 84e33: 48 85 f6 test %rsi,%rsi 84e36: 74 0e je 84e46 <baz_string+0x46> 84e38: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84e3c: ba 01 00 00 00 mov $0x1,%edx 84e41: e8 5a b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84e46: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84e4a: 49 83 c4 ff add $0xffffffffffffffff,%r12 84e4e: 75 e0 jne 84e30 <baz_string+0x30> 84e50: 4c 89 f0 mov %r14,%rax 84e53: 49 89 47 10 mov %rax,0x10(%r15) 84e57: 48 83 c4 08 add $0x8,%rsp 84e5b: 5b pop %rbx 84e5c: 41 5c pop %r12 84e5e: 41 5e pop %r14 84e60: 41 5f pop %r15 84e62: c3 retq 84e63: 90 nop 84e64: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84e6b: 00 00 00 84e6e: 66 90 xchg %ax,%ax ``` </details> For calling `truncate` with non-zero lengths on non-`Drop` types, it seems that a redundant load and comparison gets replaced with an awkward sequence with a conditional move. In the unknown length case, the new code is no longer awkward. Maybe someone moderately proficient at assembly could tell if this looks like a win or not. --- This came up when discussing replacing `unsafe { vec.set_len(0) }` with `vec.clear()` in a project where the author was worried about potential performance degradation. It might be worth replacing some unsafe code, even it it's trivial to see that it's actually safe.
…chton Use SetLenOnDrop in Vec::truncate() This avoids a redundant length check in some cases when calling `Vec::truncate` or `Vec::clear`. Fixes rust-lang#51802 Note that the generated code still seems suboptimal. I tested with the following functions: ```rust #[no_mangle] pub extern fn foo(x: &mut Vec<u8>) { x.clear(); } #[no_mangle] pub extern fn bar(x: &mut Vec<u8>) { x.truncate(5); } #[no_mangle] pub extern fn baz(x: &mut Vec<u8>, n: usize) { x.truncate(n); } #[no_mangle] pub extern fn foo_string(x: &mut Vec<String>) { x.clear(); } #[no_mangle] pub extern fn bar_string(x: &mut Vec<String>) { x.truncate(5); } #[no_mangle] pub extern fn baz_string(x: &mut Vec<String>, n: usize) { x.truncate(n); } ``` <details> <summary>Old output</summary> ```asm 00000000000460a0 <foo>: 460a0: 48 83 7f 10 00 cmpq $0x0,0x10(%rdi) 460a5: 74 08 je 460af <foo+0xf> 460a7: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 460ae: 00 460af: c3 retq 00000000000460b0 <bar>: 460b0: 48 83 7f 10 06 cmpq $0x6,0x10(%rdi) 460b5: 72 08 jb 460bf <bar+0xf> 460b7: 48 c7 47 10 05 00 00 movq $0x5,0x10(%rdi) 460be: 00 460bf: c3 retq 00000000000460c0 <baz>: 460c0: 48 39 77 10 cmp %rsi,0x10(%rdi) 460c4: 76 04 jbe 460ca <baz+0xa> 460c6: 48 89 77 10 mov %rsi,0x10(%rdi) 460ca: c3 retq 460cb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 00000000000460d0 <foo_string>: 460d0: 41 57 push %r15 460d2: 41 56 push %r14 460d4: 53 push %rbx 460d5: 48 8b 47 10 mov 0x10(%rdi),%rax 460d9: 48 85 c0 test %rax,%rax 460dc: 74 4a je 46128 <foo_string+0x58> 460de: 49 89 fe mov %rdi,%r14 460e1: 48 8b 0f mov (%rdi),%rcx 460e4: 48 8d 14 40 lea (%rax,%rax,2),%rdx 460e8: 48 8d 58 ff lea -0x1(%rax),%rbx 460ec: 4c 8d 3c d1 lea (%rcx,%rdx,8),%r15 460f0: 49 83 c7 f0 add $0xfffffffffffffff0,%r15 460f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 460fb: 00 00 00 460fe: 66 90 xchg %ax,%ax 46100: 49 89 5e 10 mov %rbx,0x10(%r14) 46104: 49 8b 37 mov (%r15),%rsi 46107: 48 85 f6 test %rsi,%rsi 4610a: 74 0e je 4611a <foo_string+0x4a> 4610c: 49 8b 7f f8 mov -0x8(%r15),%rdi 46110: ba 01 00 00 00 mov $0x1,%edx 46115: e8 a6 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4611a: 48 83 c3 ff add $0xffffffffffffffff,%rbx 4611e: 49 83 c7 e8 add $0xffffffffffffffe8,%r15 46122: 48 83 fb ff cmp $0xffffffffffffffff,%rbx 46126: 75 d8 jne 46100 <foo_string+0x30> 46128: 5b pop %rbx 46129: 41 5e pop %r14 4612b: 41 5f pop %r15 4612d: c3 retq 4612e: 66 90 xchg %ax,%ax 0000000000046130 <bar_string>: 46130: 41 57 push %r15 46132: 41 56 push %r14 46134: 53 push %rbx 46135: 4c 8b 7f 10 mov 0x10(%rdi),%r15 46139: 49 83 ff 06 cmp $0x6,%r15 4613d: 72 49 jb 46188 <bar_string+0x58> 4613f: 49 89 fe mov %rdi,%r14 46142: 48 8b 07 mov (%rdi),%rax 46145: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 46149: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 4614d: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 46151: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 46158: 00 00 00 4615b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 46160: 49 83 c7 ff add $0xffffffffffffffff,%r15 46164: 4d 89 7e 10 mov %r15,0x10(%r14) 46168: 48 8b 33 mov (%rbx),%rsi 4616b: 48 85 f6 test %rsi,%rsi 4616e: 74 0e je 4617e <bar_string+0x4e> 46170: 48 8b 7b f8 mov -0x8(%rbx),%rdi 46174: ba 01 00 00 00 mov $0x1,%edx 46179: e8 42 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4617e: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 46182: 49 83 ff 05 cmp $0x5,%r15 46186: 77 d8 ja 46160 <bar_string+0x30> 46188: 5b pop %rbx 46189: 41 5e pop %r14 4618b: 41 5f pop %r15 4618d: c3 retq 4618e: 66 90 xchg %ax,%ax 0000000000046190 <baz_string>: 46190: 41 57 push %r15 46192: 41 56 push %r14 46194: 41 54 push %r12 46196: 53 push %rbx 46197: 50 push %rax 46198: 4c 8b 67 10 mov 0x10(%rdi),%r12 4619c: 49 39 f4 cmp %rsi,%r12 4619f: 76 46 jbe 461e7 <baz_string+0x57> 461a1: 49 89 f6 mov %rsi,%r14 461a4: 49 89 ff mov %rdi,%r15 461a7: 48 8b 07 mov (%rdi),%rax 461aa: 4b 8d 0c 64 lea (%r12,%r12,2),%rcx 461ae: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 461b2: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 461b6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461bd: 00 00 00 461c0: 49 83 c4 ff add $0xffffffffffffffff,%r12 461c4: 4d 89 67 10 mov %r12,0x10(%r15) 461c8: 48 8b 33 mov (%rbx),%rsi 461cb: 48 85 f6 test %rsi,%rsi 461ce: 74 0e je 461de <baz_string+0x4e> 461d0: 48 8b 7b f8 mov -0x8(%rbx),%rdi 461d4: ba 01 00 00 00 mov $0x1,%edx 461d9: e8 e2 e8 ff ff callq 44ac0 <__rust_dealloc@plt> 461de: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 461e2: 4d 39 f4 cmp %r14,%r12 461e5: 77 d9 ja 461c0 <baz_string+0x30> 461e7: 48 83 c4 08 add $0x8,%rsp 461eb: 5b pop %rbx 461ec: 41 5c pop %r12 461ee: 41 5e pop %r14 461f0: 41 5f pop %r15 461f2: c3 retq 461f3: 90 nop 461f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461fb: 00 00 00 461fe: 66 90 xchg %ax,%ax ``` </details> <details> <summary>New output</summary> ```asm 0000000000084d10 <foo>: 84d10: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 84d17: 00 84d18: c3 retq 84d19: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 0000000000084d20 <bar>: 84d20: 48 8b 47 10 mov 0x10(%rdi),%rax 84d24: 48 83 f8 05 cmp $0x5,%rax 84d28: b9 05 00 00 00 mov $0x5,%ecx 84d2d: 48 0f 42 c8 cmovb %rax,%rcx 84d31: 48 89 4f 10 mov %rcx,0x10(%rdi) 84d35: c3 retq 84d36: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84d3d: 00 00 00 0000000000084d40 <baz>: 84d40: 48 8b 47 10 mov 0x10(%rdi),%rax 84d44: 48 39 f0 cmp %rsi,%rax 84d47: 48 0f 47 c6 cmova %rsi,%rax 84d4b: 48 89 47 10 mov %rax,0x10(%rdi) 84d4f: c3 retq 0000000000084d50 <foo_string>: 84d50: 41 57 push %r15 84d52: 41 56 push %r14 84d54: 53 push %rbx 84d55: 49 89 fe mov %rdi,%r14 84d58: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84d5c: 4d 85 ff test %r15,%r15 84d5f: 74 2f je 84d90 <foo_string+0x40> 84d61: 49 8b 06 mov (%r14),%rax 84d64: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84d68: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84d6c: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84d70: 48 8b 33 mov (%rbx),%rsi 84d73: 48 85 f6 test %rsi,%rsi 84d76: 74 0e je 84d86 <foo_string+0x36> 84d78: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84d7c: ba 01 00 00 00 mov $0x1,%edx 84d81: e8 1a b1 ff ff callq 7fea0 <__rust_dealloc@plt> 84d86: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84d8a: 49 83 c7 ff add $0xffffffffffffffff,%r15 84d8e: 75 e0 jne 84d70 <foo_string+0x20> 84d90: 49 c7 46 10 00 00 00 movq $0x0,0x10(%r14) 84d97: 00 84d98: 5b pop %rbx 84d99: 41 5e pop %r14 84d9b: 41 5f pop %r15 84d9d: c3 retq 84d9e: 66 90 xchg %ax,%ax 0000000000084da0 <bar_string>: 84da0: 41 57 push %r15 84da2: 41 56 push %r14 84da4: 53 push %rbx 84da5: 49 89 fe mov %rdi,%r14 84da8: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84dac: 49 83 ff 06 cmp $0x6,%r15 84db0: 72 44 jb 84df6 <bar_string+0x56> 84db2: 49 8b 06 mov (%r14),%rax 84db5: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84db9: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84dbd: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84dc1: 49 83 c7 fb add $0xfffffffffffffffb,%r15 84dc5: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84dcc: 00 00 00 84dcf: 90 nop 84dd0: 48 8b 33 mov (%rbx),%rsi 84dd3: 48 85 f6 test %rsi,%rsi 84dd6: 74 0e je 84de6 <bar_string+0x46> 84dd8: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84ddc: ba 01 00 00 00 mov $0x1,%edx 84de1: e8 ba b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84de6: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84dea: 49 83 c7 ff add $0xffffffffffffffff,%r15 84dee: 75 e0 jne 84dd0 <bar_string+0x30> 84df0: 41 bf 05 00 00 00 mov $0x5,%r15d 84df6: 4d 89 7e 10 mov %r15,0x10(%r14) 84dfa: 5b pop %rbx 84dfb: 41 5e pop %r14 84dfd: 41 5f pop %r15 84dff: c3 retq 0000000000084e00 <baz_string>: 84e00: 41 57 push %r15 84e02: 41 56 push %r14 84e04: 41 54 push %r12 84e06: 53 push %rbx 84e07: 50 push %rax 84e08: 49 89 ff mov %rdi,%r15 84e0b: 48 8b 47 10 mov 0x10(%rdi),%rax 84e0f: 49 89 c4 mov %rax,%r12 84e12: 49 29 f4 sub %rsi,%r12 84e15: 76 3c jbe 84e53 <baz_string+0x53> 84e17: 49 89 f6 mov %rsi,%r14 84e1a: 49 8b 0f mov (%r15),%rcx 84e1d: 48 8d 04 40 lea (%rax,%rax,2),%rax 84e21: 48 8d 1c c1 lea (%rcx,%rax,8),%rbx 84e25: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 84e30: 48 8b 33 mov (%rbx),%rsi 84e33: 48 85 f6 test %rsi,%rsi 84e36: 74 0e je 84e46 <baz_string+0x46> 84e38: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84e3c: ba 01 00 00 00 mov $0x1,%edx 84e41: e8 5a b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84e46: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84e4a: 49 83 c4 ff add $0xffffffffffffffff,%r12 84e4e: 75 e0 jne 84e30 <baz_string+0x30> 84e50: 4c 89 f0 mov %r14,%rax 84e53: 49 89 47 10 mov %rax,0x10(%r15) 84e57: 48 83 c4 08 add $0x8,%rsp 84e5b: 5b pop %rbx 84e5c: 41 5c pop %r12 84e5e: 41 5e pop %r14 84e60: 41 5f pop %r15 84e62: c3 retq 84e63: 90 nop 84e64: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84e6b: 00 00 00 84e6e: 66 90 xchg %ax,%ax ``` </details> For calling `truncate` with non-zero lengths on non-`Drop` types, it seems that a redundant load and comparison gets replaced with an awkward sequence with a conditional move. In the unknown length case, the new code is no longer awkward. Maybe someone moderately proficient at assembly could tell if this looks like a win or not. --- This came up when discussing replacing `unsafe { vec.set_len(0) }` with `vec.clear()` in a project where the author was worried about potential performance degradation. It might be worth replacing some unsafe code, even it it's trivial to see that it's actually safe.
…chton Use SetLenOnDrop in Vec::truncate() This avoids a redundant length check in some cases when calling `Vec::truncate` or `Vec::clear`. Fixes rust-lang#51802 Note that the generated code still seems suboptimal. I tested with the following functions: ```rust #[no_mangle] pub extern fn foo(x: &mut Vec<u8>) { x.clear(); } #[no_mangle] pub extern fn bar(x: &mut Vec<u8>) { x.truncate(5); } #[no_mangle] pub extern fn baz(x: &mut Vec<u8>, n: usize) { x.truncate(n); } #[no_mangle] pub extern fn foo_string(x: &mut Vec<String>) { x.clear(); } #[no_mangle] pub extern fn bar_string(x: &mut Vec<String>) { x.truncate(5); } #[no_mangle] pub extern fn baz_string(x: &mut Vec<String>, n: usize) { x.truncate(n); } ``` <details> <summary>Old output</summary> ```asm 00000000000460a0 <foo>: 460a0: 48 83 7f 10 00 cmpq $0x0,0x10(%rdi) 460a5: 74 08 je 460af <foo+0xf> 460a7: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 460ae: 00 460af: c3 retq 00000000000460b0 <bar>: 460b0: 48 83 7f 10 06 cmpq $0x6,0x10(%rdi) 460b5: 72 08 jb 460bf <bar+0xf> 460b7: 48 c7 47 10 05 00 00 movq $0x5,0x10(%rdi) 460be: 00 460bf: c3 retq 00000000000460c0 <baz>: 460c0: 48 39 77 10 cmp %rsi,0x10(%rdi) 460c4: 76 04 jbe 460ca <baz+0xa> 460c6: 48 89 77 10 mov %rsi,0x10(%rdi) 460ca: c3 retq 460cb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 00000000000460d0 <foo_string>: 460d0: 41 57 push %r15 460d2: 41 56 push %r14 460d4: 53 push %rbx 460d5: 48 8b 47 10 mov 0x10(%rdi),%rax 460d9: 48 85 c0 test %rax,%rax 460dc: 74 4a je 46128 <foo_string+0x58> 460de: 49 89 fe mov %rdi,%r14 460e1: 48 8b 0f mov (%rdi),%rcx 460e4: 48 8d 14 40 lea (%rax,%rax,2),%rdx 460e8: 48 8d 58 ff lea -0x1(%rax),%rbx 460ec: 4c 8d 3c d1 lea (%rcx,%rdx,8),%r15 460f0: 49 83 c7 f0 add $0xfffffffffffffff0,%r15 460f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 460fb: 00 00 00 460fe: 66 90 xchg %ax,%ax 46100: 49 89 5e 10 mov %rbx,0x10(%r14) 46104: 49 8b 37 mov (%r15),%rsi 46107: 48 85 f6 test %rsi,%rsi 4610a: 74 0e je 4611a <foo_string+0x4a> 4610c: 49 8b 7f f8 mov -0x8(%r15),%rdi 46110: ba 01 00 00 00 mov $0x1,%edx 46115: e8 a6 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4611a: 48 83 c3 ff add $0xffffffffffffffff,%rbx 4611e: 49 83 c7 e8 add $0xffffffffffffffe8,%r15 46122: 48 83 fb ff cmp $0xffffffffffffffff,%rbx 46126: 75 d8 jne 46100 <foo_string+0x30> 46128: 5b pop %rbx 46129: 41 5e pop %r14 4612b: 41 5f pop %r15 4612d: c3 retq 4612e: 66 90 xchg %ax,%ax 0000000000046130 <bar_string>: 46130: 41 57 push %r15 46132: 41 56 push %r14 46134: 53 push %rbx 46135: 4c 8b 7f 10 mov 0x10(%rdi),%r15 46139: 49 83 ff 06 cmp $0x6,%r15 4613d: 72 49 jb 46188 <bar_string+0x58> 4613f: 49 89 fe mov %rdi,%r14 46142: 48 8b 07 mov (%rdi),%rax 46145: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 46149: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 4614d: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 46151: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 46158: 00 00 00 4615b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 46160: 49 83 c7 ff add $0xffffffffffffffff,%r15 46164: 4d 89 7e 10 mov %r15,0x10(%r14) 46168: 48 8b 33 mov (%rbx),%rsi 4616b: 48 85 f6 test %rsi,%rsi 4616e: 74 0e je 4617e <bar_string+0x4e> 46170: 48 8b 7b f8 mov -0x8(%rbx),%rdi 46174: ba 01 00 00 00 mov $0x1,%edx 46179: e8 42 e9 ff ff callq 44ac0 <__rust_dealloc@plt> 4617e: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 46182: 49 83 ff 05 cmp $0x5,%r15 46186: 77 d8 ja 46160 <bar_string+0x30> 46188: 5b pop %rbx 46189: 41 5e pop %r14 4618b: 41 5f pop %r15 4618d: c3 retq 4618e: 66 90 xchg %ax,%ax 0000000000046190 <baz_string>: 46190: 41 57 push %r15 46192: 41 56 push %r14 46194: 41 54 push %r12 46196: 53 push %rbx 46197: 50 push %rax 46198: 4c 8b 67 10 mov 0x10(%rdi),%r12 4619c: 49 39 f4 cmp %rsi,%r12 4619f: 76 46 jbe 461e7 <baz_string+0x57> 461a1: 49 89 f6 mov %rsi,%r14 461a4: 49 89 ff mov %rdi,%r15 461a7: 48 8b 07 mov (%rdi),%rax 461aa: 4b 8d 0c 64 lea (%r12,%r12,2),%rcx 461ae: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 461b2: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 461b6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461bd: 00 00 00 461c0: 49 83 c4 ff add $0xffffffffffffffff,%r12 461c4: 4d 89 67 10 mov %r12,0x10(%r15) 461c8: 48 8b 33 mov (%rbx),%rsi 461cb: 48 85 f6 test %rsi,%rsi 461ce: 74 0e je 461de <baz_string+0x4e> 461d0: 48 8b 7b f8 mov -0x8(%rbx),%rdi 461d4: ba 01 00 00 00 mov $0x1,%edx 461d9: e8 e2 e8 ff ff callq 44ac0 <__rust_dealloc@plt> 461de: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 461e2: 4d 39 f4 cmp %r14,%r12 461e5: 77 d9 ja 461c0 <baz_string+0x30> 461e7: 48 83 c4 08 add $0x8,%rsp 461eb: 5b pop %rbx 461ec: 41 5c pop %r12 461ee: 41 5e pop %r14 461f0: 41 5f pop %r15 461f2: c3 retq 461f3: 90 nop 461f4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 461fb: 00 00 00 461fe: 66 90 xchg %ax,%ax ``` </details> <details> <summary>New output</summary> ```asm 0000000000084d10 <foo>: 84d10: 48 c7 47 10 00 00 00 movq $0x0,0x10(%rdi) 84d17: 00 84d18: c3 retq 84d19: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 0000000000084d20 <bar>: 84d20: 48 8b 47 10 mov 0x10(%rdi),%rax 84d24: 48 83 f8 05 cmp $0x5,%rax 84d28: b9 05 00 00 00 mov $0x5,%ecx 84d2d: 48 0f 42 c8 cmovb %rax,%rcx 84d31: 48 89 4f 10 mov %rcx,0x10(%rdi) 84d35: c3 retq 84d36: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84d3d: 00 00 00 0000000000084d40 <baz>: 84d40: 48 8b 47 10 mov 0x10(%rdi),%rax 84d44: 48 39 f0 cmp %rsi,%rax 84d47: 48 0f 47 c6 cmova %rsi,%rax 84d4b: 48 89 47 10 mov %rax,0x10(%rdi) 84d4f: c3 retq 0000000000084d50 <foo_string>: 84d50: 41 57 push %r15 84d52: 41 56 push %r14 84d54: 53 push %rbx 84d55: 49 89 fe mov %rdi,%r14 84d58: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84d5c: 4d 85 ff test %r15,%r15 84d5f: 74 2f je 84d90 <foo_string+0x40> 84d61: 49 8b 06 mov (%r14),%rax 84d64: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84d68: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84d6c: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84d70: 48 8b 33 mov (%rbx),%rsi 84d73: 48 85 f6 test %rsi,%rsi 84d76: 74 0e je 84d86 <foo_string+0x36> 84d78: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84d7c: ba 01 00 00 00 mov $0x1,%edx 84d81: e8 1a b1 ff ff callq 7fea0 <__rust_dealloc@plt> 84d86: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84d8a: 49 83 c7 ff add $0xffffffffffffffff,%r15 84d8e: 75 e0 jne 84d70 <foo_string+0x20> 84d90: 49 c7 46 10 00 00 00 movq $0x0,0x10(%r14) 84d97: 00 84d98: 5b pop %rbx 84d99: 41 5e pop %r14 84d9b: 41 5f pop %r15 84d9d: c3 retq 84d9e: 66 90 xchg %ax,%ax 0000000000084da0 <bar_string>: 84da0: 41 57 push %r15 84da2: 41 56 push %r14 84da4: 53 push %rbx 84da5: 49 89 fe mov %rdi,%r14 84da8: 4c 8b 7f 10 mov 0x10(%rdi),%r15 84dac: 49 83 ff 06 cmp $0x6,%r15 84db0: 72 44 jb 84df6 <bar_string+0x56> 84db2: 49 8b 06 mov (%r14),%rax 84db5: 4b 8d 0c 7f lea (%r15,%r15,2),%rcx 84db9: 48 8d 1c c8 lea (%rax,%rcx,8),%rbx 84dbd: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84dc1: 49 83 c7 fb add $0xfffffffffffffffb,%r15 84dc5: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84dcc: 00 00 00 84dcf: 90 nop 84dd0: 48 8b 33 mov (%rbx),%rsi 84dd3: 48 85 f6 test %rsi,%rsi 84dd6: 74 0e je 84de6 <bar_string+0x46> 84dd8: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84ddc: ba 01 00 00 00 mov $0x1,%edx 84de1: e8 ba b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84de6: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84dea: 49 83 c7 ff add $0xffffffffffffffff,%r15 84dee: 75 e0 jne 84dd0 <bar_string+0x30> 84df0: 41 bf 05 00 00 00 mov $0x5,%r15d 84df6: 4d 89 7e 10 mov %r15,0x10(%r14) 84dfa: 5b pop %rbx 84dfb: 41 5e pop %r14 84dfd: 41 5f pop %r15 84dff: c3 retq 0000000000084e00 <baz_string>: 84e00: 41 57 push %r15 84e02: 41 56 push %r14 84e04: 41 54 push %r12 84e06: 53 push %rbx 84e07: 50 push %rax 84e08: 49 89 ff mov %rdi,%r15 84e0b: 48 8b 47 10 mov 0x10(%rdi),%rax 84e0f: 49 89 c4 mov %rax,%r12 84e12: 49 29 f4 sub %rsi,%r12 84e15: 76 3c jbe 84e53 <baz_string+0x53> 84e17: 49 89 f6 mov %rsi,%r14 84e1a: 49 8b 0f mov (%r15),%rcx 84e1d: 48 8d 04 40 lea (%rax,%rax,2),%rax 84e21: 48 8d 1c c1 lea (%rcx,%rax,8),%rbx 84e25: 48 83 c3 f0 add $0xfffffffffffffff0,%rbx 84e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 84e30: 48 8b 33 mov (%rbx),%rsi 84e33: 48 85 f6 test %rsi,%rsi 84e36: 74 0e je 84e46 <baz_string+0x46> 84e38: 48 8b 7b f8 mov -0x8(%rbx),%rdi 84e3c: ba 01 00 00 00 mov $0x1,%edx 84e41: e8 5a b0 ff ff callq 7fea0 <__rust_dealloc@plt> 84e46: 48 83 c3 e8 add $0xffffffffffffffe8,%rbx 84e4a: 49 83 c4 ff add $0xffffffffffffffff,%r12 84e4e: 75 e0 jne 84e30 <baz_string+0x30> 84e50: 4c 89 f0 mov %r14,%rax 84e53: 49 89 47 10 mov %rax,0x10(%r15) 84e57: 48 83 c4 08 add $0x8,%rsp 84e5b: 5b pop %rbx 84e5c: 41 5c pop %r12 84e5e: 41 5e pop %r14 84e60: 41 5f pop %r15 84e62: c3 retq 84e63: 90 nop 84e64: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 84e6b: 00 00 00 84e6e: 66 90 xchg %ax,%ax ``` </details> For calling `truncate` with non-zero lengths on non-`Drop` types, it seems that a redundant load and comparison gets replaced with an awkward sequence with a conditional move. In the unknown length case, the new code is no longer awkward. Maybe someone moderately proficient at assembly could tell if this looks like a win or not. --- This came up when discussing replacing `unsafe { vec.set_len(0) }` with `vec.clear()` in a project where the author was worried about potential performance degradation. It might be worth replacing some unsafe code, even it it's trivial to see that it's actually safe.
See https://play.rust-lang.org/?gist=2754f11f7236754b2121eb9e4c12aa73&version=nightly&mode=release. The length of the
Vec
is read before being written to.I would have expected the peephole optimizer in LLVM to recognize that pattern. Interestingly, it's not a regression; according to https://rust.godbolt.org/, the issue was even present in
rustc 1.0
.The text was updated successfully, but these errors were encountered: