The issue reproduces only if FatPtr
code resides in a separate crate and only if compiled in release configuration.
I tried this code:
fat_ptr
's crate lib.rs
:
pub struct FatPtr {
ptr: *mut u8,
len: usize,
}
impl FatPtr {
pub fn new(len: usize) -> FatPtr {
let ptr = Box::into_raw(vec![42u8; len].into_boxed_slice()) as *mut u8;
FatPtr { ptr, len }
}
}
impl std::ops::Deref for FatPtr {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
}
}
impl std::ops::Drop for FatPtr {
fn drop(&mut self) {
println!("Drop");
}
}
test
's crate main.rs
:
use fat_ptr::FatPtr;
fn print(data: &[u8]) {
println!("{:#?}", data);
}
fn main() {
let ptr = FatPtr::new(20);
let data = unsafe { std::slice::from_raw_parts(ptr.as_ptr(), ptr.len()) };
print(data);
}
I expected to see this happen: 42
printed 20 times.
Instead, this happened: 42
printed 20 times in debug build, but not in release. Instead, slice length appears to be corrupted, so println
prints memory dump outside of allocated slice and eventually segfaults. However, if Drop
implementation for FatPtr
is removed, then slice is truncated to a single byte, which is still incorrect, but doesn't crash the process.
If print(data)
call is replaced with println!("{:#?}", data);
directly then everything works as intended even on release builds. If FatPtr
code placed in the same crate as main
then everything works as intended as well. Removing #[inline]
from deref
fixed the issue as well.
This reproduces on Linux and MacOS (haven't tried Windows).
rustc --version --verbose
:
rustc 1.46.0 (04488afe3 2020-08-24)
binary: rustc
commit-hash: 04488afe34512aa4c33566eb16d8c912a3ae04f9
commit-date: 2020-08-24
host: x86_64-apple-darwin
release: 1.46.0
LLVM version: 10.0
Backtrace
N/A
This passes under Miri, and it looks like the invariants for std::slice::from_raw_parts
are being upheld.
-Z unpretty=mir
: https://gist.github.com/jyn514/8cc91dae699c828e7e9164f9de03cd38
I can reproduce this at least as far back as nightly-2018-07-07
(although it's an infinite loop there, not a segfault). So I'd guess this isn't a regression and just never worked.
The slice length returned from FatPtr::deref is considered dead and eliminated
by Dead Argument Elimination, that doesn't seem right since there are two
calls to FatPtr::deref, one that uses returned ptr and the other one that uses
returned length.
Minimized LLVM IR
; ModuleID = 'c.ll'
source_filename = "b.7rcbfp3g-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@anon.7406e8c0c880c9bceef10e4e4055b7c3.1 = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"ab\0A\00" }>, align 1
; Function Attrs: inlinehint nonlazybind uwtable
define internal { [0 x i8]*, i64 } @from_raw_parts(i8* %data, i64 %len) unnamed_addr #0 {
start:
%0 = bitcast i8* %data to [0 x i8]*
%1 = insertvalue { [0 x i8]*, i64 } undef, [0 x i8]* %0, 0
%2 = insertvalue { [0 x i8]*, i64 } %1, i64 %len, 1
ret { [0 x i8]*, i64 } %2
}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i64 @slice_len([0 x i8]* noalias nonnull readonly align 1 %data, i64 %len) unnamed_addr #0 {
start:
ret i64 %len
}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i8* @as_ptr([0 x i8]* noalias nonnull readonly align 1 %data, i64 %len) unnamed_addr #0 {
start:
%0 = bitcast [0 x i8]* %data to i8*
ret i8* %0
}
; Function Attrs: inlinehint nonlazybind uwtable
define internal { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %self) unnamed_addr #0 {
start:
%0 = getelementptr inbounds { i8*, i64 }, { i8*, i64 }* %self, i64 0, i32 0
%_2 = load i8*, i8** %0, align 8
%1 = getelementptr inbounds { i8*, i64 }, { i8*, i64 }* %self, i64 0, i32 1
%_3 = load i64, i64* %1, align 8
%2 = call { [0 x i8]*, i64 } @from_raw_parts(i8* %_2, i64 %_3)
ret { [0 x i8]*, i64 } %2
}
; Function Attrs: noinline norecurse nounwind nonlazybind optnone readnone uwtable
define { i8*, i64 } @crate() unnamed_addr #2 {
ret { i8*, i64 } { i8* getelementptr inbounds (<{ [4 x i8] }>, <{ [4 x i8] }>* @anon.7406e8c0c880c9bceef10e4e4055b7c3.1, i64 0, i32 0, i64 0), i64 3 }
}
; Function Attrs: nonlazybind
declare dso_local i64 @write(i32, i8* nocapture readonly, i64) local_unnamed_addr #3
; Function Attrs: nonlazybind
define i32 @main(i32 %argc, i8** %argv) unnamed_addr #3 {
start:
%ptr = alloca { i8*, i64 }, align 8
%0 = call { i8*, i64 } @crate()
store { i8*, i64 } %0, { i8*, i64 }* %ptr, align 8
%1 = call { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %ptr)
%_1.0 = extractvalue { [0 x i8]*, i64 } %1, 0
%_1.1 = extractvalue { [0 x i8]*, i64 } %1, 1
%2 = call i8* @as_ptr([0 x i8]* noalias nonnull readonly align 1 %_1.0, i64 %_1.1)
%3 = call { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %ptr)
%_3.0 = extractvalue { [0 x i8]*, i64 } %3, 0
%_3.1 = extractvalue { [0 x i8]*, i64 } %3, 1
%4 = call i64 @slice_len([0 x i8]* noalias nonnull readonly align 1 %_3.0, i64 %_3.1)
%5 = call { [0 x i8]*, i64 } @from_raw_parts(i8* %2, i64 %4)
%_5.0 = extractvalue { [0 x i8]*, i64 } %5, 0
%_5.1 = extractvalue { [0 x i8]*, i64 } %5, 1
%6 = bitcast [0 x i8]* %_5.0 to i8*
%7 = call i64 @write(i32 1, i8* nonnull %6, i64 %_5.1)
ret i32 0
}
attributes #0 = { inlinehint nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #2 = { noinline norecurse nounwind nonlazybind optnone readnone uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #3 = { nonlazybind "target-cpu"="x86-64" }
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
Assigning P-high
as discussed as part of the Prioritization Working Group procedure.
Nominating this for a discussion in T-compiler. While not a "regression" this issue seems just as bad if not worse than some other soundness holes that we鈥檝e had at P-critical. And it seems like it could be fairly easy to trigger in typical code (and also that there could be an entirely safe reproducer for this issue).
This is caused by a bug in DeadArgumentElimination
:
After inspecting all of the users of a function's return values, MarkValue
is called for each return value: https://github.com/rust-lang/llvm-project/blob/833dd1e3d4fd350c7c9f6fb2ce0c5f16af7a1e21/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp#L610-L611
MarkValue
either calls MarkLive
(which propagates liveness dependencies using the global Uses
map), or updates the Uses
map with the dependencies that were computed earlier: https://github.com/rust-lang/llvm-project/blob/833dd1e3d4fd350c7c9f6fb2ce0c5f16af7a1e21/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp#L653-L666
However, the dependencies computed for higher-numbered return values can affect the MarkLive
calls for lower-numbered dependencies. That is, we may call PropagateLiveness
for a RetOrArg
before we have fully populated Uses
for that RetOrArg
.
One solution is to iterate over the return values in two passes - we update the Uses
map in the first pass, and call MarkLive
in the second pass.
LLVM bug report https://bugs.llvm.org/show_bug.cgi?id=47444.
This was re-discussed as part of the Prioritization Working Group procedure and we decided to bump the priority to P-critical
.
I'm working on a patch for LLVM
@Aaron1011 can we assign this issue to you then?
My LLVM patch is posted at https://reviews.llvm.org/D87474
The commit can be cherry-picked from https://github.com/Aaron1011/llvm-project/tree/fix/dead-arg-ret-elim
A different LLVM patch (which still fixes this issue) has been accepted: https://reviews.llvm.org/D88529
@Aaron1011 do you know in which LLVM version is this going to be included?
@spastorino it will be included in LLVM 12 which is expected around March/April.
The accepted patch is pretty small and self-contained, so I think we could cherry-pick it if we wanted to
Most helpful comment
I'm working on a patch for LLVM