Rust: Optimize for the range of resulting values from the euclidean remainder of a signed integer

Created on 3 Dec 2019  Â·  3Comments  Â·  Source: rust-lang/rust

Is it possible to give the compiler knowledge that the euclidean remainder of a signed integer is always positive? Specifically, 0 ≤ rem ≤ x for value.rem_euclid(x). This already appears to be the case for both regular modulo and the euclidean remainder of signed integers. Right now, it's necessary to add a branch that isn't optimized away.

With an unsigned integer:

pub fn rem(value: u8) -> u8 {
    match value.rem_euclid(7) {
        remainder @ 0..=6 => remainder,
        _ => unreachable!(),
    }
}

godbolt

results in the following assembly

example::rem:
        movzx   eax, dil
        lea     ecx, [rax + 8*rax]
        lea     ecx, [rax + 4*rcx]
        shr     ecx, 8
        mov     edx, eax
        sub     dl, cl
        shr     dl
        add     dl, cl
        shr     dl, 2
        movzx   ecx, dl
        lea     edx, [8*rcx]
        sub     edx, ecx
        sub     al, dl
        ret

With a signed integer:

pub fn rem(value: i8) -> i8 {
    match value.rem_euclid(7) {
        remainder @ 0..=6 => remainder,
        _ => unreachable!(),
    }
}

godbolt

results in the following assembly

<T as core::any::Any>::type_id:
        movabs  rax, 1229646359891580772
        ret

std::panicking::begin_panic:
        sub     rsp, 24
        lea     rax, [rip + .L__unnamed_3]
        mov     qword ptr [rsp + 8], rax
        mov     qword ptr [rsp + 16], 40
        lea     rsi, [rip + .L__unnamed_1]
        lea     rcx, [rip + .L__unnamed_4]
        lea     rdi, [rsp + 8]
        xor     edx, edx
        call    qword ptr [rip + std::panicking::rust_panic_with_hook@GOTPCREL]
        ud2

core::ptr::real_drop_in_place:
        ret

<std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::get:
        push    rax
        cmp     qword ptr [rdi], 0
        je      .LBB3_1
        mov     rax, rdi
        lea     rdx, [rip + .L__unnamed_2]
        pop     rcx
        ret
.LBB3_1:
        call    qword ptr [rip + std::process::abort@GOTPCREL]
        ud2

<std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::take_box:
        push    r14
        push    rbx
        push    rax
        mov     rbx, qword ptr [rdi]
        mov     r14, qword ptr [rdi + 8]
        mov     qword ptr [rdi], 0
        test    rbx, rbx
        je      .LBB4_3
        mov     edi, 16
        mov     esi, 8
        call    qword ptr [rip + __rust_alloc@GOTPCREL]
        test    rax, rax
        je      .LBB4_4
        mov     qword ptr [rax], rbx
        mov     qword ptr [rax + 8], r14
        lea     rdx, [rip + .L__unnamed_2]
        add     rsp, 8
        pop     rbx
        pop     r14
        ret
.LBB4_3:
        call    qword ptr [rip + std::process::abort@GOTPCREL]
        ud2
.LBB4_4:
        mov     edi, 16
        mov     esi, 8
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2

example::rem:
        push    rax
        movsx   eax, dil
        imul    eax, eax, -109
        shr     eax, 8
        add     al, dil
        mov     ecx, eax
        shr     cl, 7
        sar     al, 2
        add     al, cl
        movzx   eax, al
        lea     ecx, [8*rax]
        sub     ecx, eax
        sub     dil, cl
        lea     eax, [rdi + 7]
        test    dil, dil
        movzx   ecx, dil
        movzx   eax, al
        cmovns  eax, ecx
        cmp     al, 6
        ja      .LBB5_2
        pop     rcx
        ret
.LBB5_2:
        call    std::panicking::begin_panic
        ud2

.L__unnamed_1:
        .quad   core::ptr::real_drop_in_place
        .quad   16
        .quad   8
        .quad   <std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::take_box
        .quad   <std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::get

.L__unnamed_2:
        .quad   core::ptr::real_drop_in_place
        .quad   16
        .quad   8
        .quad   <T as core::any::Any>::type_id

.L__unnamed_5:
        .ascii  "./example.rs"

.L__unnamed_4:
        .quad   .L__unnamed_5
        .asciz  "\f\000\000\000\000\000\000\000\004\000\000\000\016\000\000"

.L__unnamed_3:
        .ascii  "internal error: entered unreachable code"
C-enhancement I-slow T-compiler

Most helpful comment

@the8472 I'm aware that I can use unsafe. My request is that I shouldn't have to, as the compiler should be able to optimize it away the same way it does with signed integers. The crate I'm rewriting has #![forbid(unsafe_code)] for a reason 🙂

All 3 comments

try

pub unsafe fn rem(value: i8) -> i8 {
    match value.rem_euclid(7)
    {
        remainder @ 0..=6 => remainder,
        _ => std::hint::unreachable_unchecked(),
    }
}

@the8472 I'm aware that I can use unsafe. My request is that I shouldn't have to, as the compiler should be able to optimize it away the same way it does with signed integers. The crate I'm rewriting has #![forbid(unsafe_code)] for a reason 🙂

I'd be interested in trying to fix this if anyone could point me in the right direction. I have looked at some compiler code before, but never really hacked any of it.

Was this page helpful?
0 / 5 - 0 ratings