Rust: Optimisation-caused UB during cross-crate compilation

Created on 5 Sep 2020  路  17Comments  路  Source: rust-lang/rust


The issue reproduces only if FatPtr code resides in a separate crate and only if compiled in release configuration.

I tried this code:

fat_ptr's crate lib.rs:

pub struct FatPtr {
    ptr: *mut u8,
    len: usize,
}

impl FatPtr {
    pub fn new(len: usize) -> FatPtr {
        let ptr = Box::into_raw(vec![42u8; len].into_boxed_slice()) as *mut u8;

        FatPtr { ptr, len }
    }
}

impl std::ops::Deref for FatPtr {
    type Target = [u8];

    #[inline]
    fn deref(&self) -> &[u8] {
        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
    }
}

impl std::ops::Drop for FatPtr {
    fn drop(&mut self) {
        println!("Drop");
    }
}

test's crate main.rs:

use fat_ptr::FatPtr;

fn print(data: &[u8]) {
    println!("{:#?}", data);
}

fn main() {
    let ptr = FatPtr::new(20);
    let data = unsafe { std::slice::from_raw_parts(ptr.as_ptr(), ptr.len()) };

    print(data);
}

I expected to see this happen: 42 printed 20 times.

Instead, this happened: 42 printed 20 times in debug build, but not in release. Instead, slice length appears to be corrupted, so println prints memory dump outside of allocated slice and eventually segfaults. However, if Drop implementation for FatPtr is removed, then slice is truncated to a single byte, which is still incorrect, but doesn't crash the process.

If print(data) call is replaced with println!("{:#?}", data); directly then everything works as intended even on release builds. If FatPtr code placed in the same crate as main then everything works as intended as well. Removing #[inline] from deref fixed the issue as well.

This reproduces on Linux and MacOS (haven't tried Windows).

Meta

rustc --version --verbose:

rustc 1.46.0 (04488afe3 2020-08-24)
binary: rustc
commit-hash: 04488afe34512aa4c33566eb16d8c912a3ae04f9
commit-date: 2020-08-24
host: x86_64-apple-darwin
release: 1.46.0
LLVM version: 10.0


Backtrace

N/A

A-LLVM A-codegen C-bug I-unsound 馃挜 P-critical T-compiler

Most helpful comment

I'm working on a patch for LLVM

All 17 comments

This passes under Miri, and it looks like the invariants for std::slice::from_raw_parts are being upheld.

I can reproduce this at least as far back as nightly-2018-07-07 (although it's an infinite loop there, not a segfault). So I'd guess this isn't a regression and just never worked.

The slice length returned from FatPtr::deref is considered dead and eliminated
by Dead Argument Elimination, that doesn't seem right since there are two
calls to FatPtr::deref, one that uses returned ptr and the other one that uses
returned length.

Minimized LLVM IR

; ModuleID = 'c.ll'
source_filename = "b.7rcbfp3g-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@anon.7406e8c0c880c9bceef10e4e4055b7c3.1 = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"ab\0A\00" }>, align 1

; Function Attrs: inlinehint nonlazybind uwtable
define internal { [0 x i8]*, i64 } @from_raw_parts(i8* %data, i64 %len) unnamed_addr #0 {
start:
  %0 = bitcast i8* %data to [0 x i8]*
  %1 = insertvalue { [0 x i8]*, i64 } undef, [0 x i8]* %0, 0
  %2 = insertvalue { [0 x i8]*, i64 } %1, i64 %len, 1
  ret { [0 x i8]*, i64 } %2
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal i64 @slice_len([0 x i8]* noalias nonnull readonly align 1 %data, i64 %len) unnamed_addr #0 {
start:
  ret i64 %len
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal i8* @as_ptr([0 x i8]* noalias nonnull readonly align 1 %data, i64 %len) unnamed_addr #0 {
start:
  %0 = bitcast [0 x i8]* %data to i8*
  ret i8* %0
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %self) unnamed_addr #0 {
start:
  %0 = getelementptr inbounds { i8*, i64 }, { i8*, i64 }* %self, i64 0, i32 0
  %_2 = load i8*, i8** %0, align 8
  %1 = getelementptr inbounds { i8*, i64 }, { i8*, i64 }* %self, i64 0, i32 1
  %_3 = load i64, i64* %1, align 8
  %2 = call { [0 x i8]*, i64 } @from_raw_parts(i8* %_2, i64 %_3)
  ret { [0 x i8]*, i64 } %2
}

; Function Attrs: noinline norecurse nounwind nonlazybind optnone readnone uwtable
define { i8*, i64 } @crate() unnamed_addr #2 {
  ret { i8*, i64 } { i8* getelementptr inbounds (<{ [4 x i8] }>, <{ [4 x i8] }>* @anon.7406e8c0c880c9bceef10e4e4055b7c3.1, i64 0, i32 0, i64 0), i64 3 }
}

; Function Attrs: nonlazybind
declare dso_local i64 @write(i32, i8* nocapture readonly, i64) local_unnamed_addr #3

; Function Attrs: nonlazybind
define i32 @main(i32 %argc, i8** %argv) unnamed_addr #3 {
start:
  %ptr = alloca { i8*, i64 }, align 8
  %0 = call { i8*, i64 } @crate()
  store { i8*, i64 } %0, { i8*, i64 }* %ptr, align 8
  %1 = call { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %ptr)
  %_1.0 = extractvalue { [0 x i8]*, i64 } %1, 0
  %_1.1 = extractvalue { [0 x i8]*, i64 } %1, 1
  %2 = call i8* @as_ptr([0 x i8]* noalias nonnull readonly align 1 %_1.0, i64 %_1.1)
  %3 = call { [0 x i8]*, i64 } @deref({ i8*, i64 }* noalias readonly align 8 dereferenceable(16) %ptr)
  %_3.0 = extractvalue { [0 x i8]*, i64 } %3, 0
  %_3.1 = extractvalue { [0 x i8]*, i64 } %3, 1
  %4 = call i64 @slice_len([0 x i8]* noalias nonnull readonly align 1 %_3.0, i64 %_3.1)
  %5 = call { [0 x i8]*, i64 } @from_raw_parts(i8* %2, i64 %4)
  %_5.0 = extractvalue { [0 x i8]*, i64 } %5, 0
  %_5.1 = extractvalue { [0 x i8]*, i64 } %5, 1
  %6 = bitcast [0 x i8]* %_5.0 to i8*
  %7 = call i64 @write(i32 1, i8* nonnull %6, i64 %_5.1)
  ret i32 0
}

attributes #0 = { inlinehint nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #2 = { noinline norecurse nounwind nonlazybind optnone readnone uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #3 = { nonlazybind "target-cpu"="x86-64" }

!llvm.module.flags = !{!0, !1, !2}

!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}

Nominating this for a discussion in T-compiler. While not a "regression" this issue seems just as bad if not worse than some other soundness holes that we鈥檝e had at P-critical. And it seems like it could be fairly easy to trigger in typical code (and also that there could be an entirely safe reproducer for this issue).

This is caused by a bug in DeadArgumentElimination:

After inspecting all of the users of a function's return values, MarkValue is called for each return value: https://github.com/rust-lang/llvm-project/blob/833dd1e3d4fd350c7c9f6fb2ce0c5f16af7a1e21/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp#L610-L611

MarkValue either calls MarkLive (which propagates liveness dependencies using the global Uses map), or updates the Uses map with the dependencies that were computed earlier: https://github.com/rust-lang/llvm-project/blob/833dd1e3d4fd350c7c9f6fb2ce0c5f16af7a1e21/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp#L653-L666

However, the dependencies computed for higher-numbered return values can affect the MarkLive calls for lower-numbered dependencies. That is, we may call PropagateLiveness for a RetOrArg before we have fully populated Uses for that RetOrArg.

One solution is to iterate over the return values in two passes - we update the Uses map in the first pass, and call MarkLive in the second pass.

This was re-discussed as part of the Prioritization Working Group procedure and we decided to bump the priority to P-critical.

I'm working on a patch for LLVM

@Aaron1011 can we assign this issue to you then?

My LLVM patch is posted at https://reviews.llvm.org/D87474
The commit can be cherry-picked from https://github.com/Aaron1011/llvm-project/tree/fix/dead-arg-ret-elim

A different LLVM patch (which still fixes this issue) has been accepted: https://reviews.llvm.org/D88529

@Aaron1011 do you know in which LLVM version is this going to be included?

@spastorino it will be included in LLVM 12 which is expected around March/April.

The accepted patch is pretty small and self-contained, so I think we could cherry-pick it if we wanted to

Was this page helpful?
0 / 5 - 0 ratings

Related issues

nikomatsakis picture nikomatsakis  路  331Comments

GuillaumeGomez picture GuillaumeGomez  路  300Comments

nikomatsakis picture nikomatsakis  路  412Comments

nikomatsakis picture nikomatsakis  路  236Comments

nikomatsakis picture nikomatsakis  路  259Comments