I am trying to build a shared WebAssembly library (in the sense of https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md) from Rust.
The following looks promising:
#![no_main]
#![feature(link_args)]
#![allow(unused_attributes)]
#![link_args = "--import-memory"]
#![link_args = "--shared"]
// no_std just to make the output smaller and easier to understand
#![no_std]
#![feature(lang_items)]
#[lang = "eh_personality"] extern fn eh_personality() {}
use core::panic::PanicInfo;
#[panic_handler]
fn panic(_info: &PanicInfo) -> ! { loop {} }
#[link(wasm_import_module = "import")]
extern { pub fn foo(ptr : *const u8); }
#[export_name = "start"]
pub unsafe fn start() {
foo("Hi".as_ptr());
}
If I compile this (using --target wasm32-unknown-unknown), I get the following code:
(module
(type $0 (func (param i32)))
(type $1 (func))
(type $2 (func (param i32 i32) (result i32)))
(import "env" "memory" (memory $0 0))
(import "env" "__indirect_function_table" (table 0 anyfunc))
(import "env" "__stack_pointer" (global $gimport$2 i32))
(import "env" "__memory_base" (global $gimport$3 i32))
(import "env" "__table_base" (global $gimport$4 i32))
(import "import" "foo" (func $foo (param i32)))
(global $global$0 i32 (i32.const 0))
(data (get_global $gimport$3) "\01gdb_load_rust_pretty_printers.py\00Hi")
(export "__rustc_debug_gdb_scripts_section__" (global $global$0))
(export "start" (func $start))
(func $__wasm_call_ctors (; 1 ;) (type $1)
)
(func $_ZN4core3str21_$LT$impl$u20$str$GT$6as_ptr17hd1ab9e60453863b8E (; 2 ;) (type $2) (param $var$0 i32) (param $var$1 i32) (result i32)
(local $var$2 i32)
(local $var$3 i32)
(local $var$4 i32)
(local $var$5 i32)
(set_local $var$2
(get_global $gimport$2)
)
(set_local $var$3
(i32.const 16)
)
(set_local $var$4
(i32.sub
(get_local $var$2)
(get_local $var$3)
)
)
(i32.store offset=8
(get_local $var$4)
(get_local $var$0)
)
(i32.store offset=12
(get_local $var$4)
(get_local $var$1)
)
(set_local $var$5
(i32.load offset=8
(get_local $var$4)
)
)
(return
(get_local $var$5)
)
)
(func $start (; 3 ;) (type $1)
(local $var$0 i32)
(local $var$1 i32)
(local $var$2 i32)
(local $var$3 i32)
(set_local $var$0
(i32.const 34)
)
(set_local $var$1
(get_local $var$0)
)
(set_local $var$2
(i32.const 2)
)
(set_local $var$3
(call $_ZN4core3str21_$LT$impl$u20$str$GT$6as_ptr17hd1ab9e60453863b8E
(get_local $var$1)
(get_local $var$2)
)
)
(call $foo
(get_local $var$3)
)
(return)
)
;; custom section "dylink", size 5
;; custom section ".debug_info", size 1748
;; custom section ".debug_macinfo", size 2
;; custom section ".debug_pubtypes", size 959
;; custom section ".debug_ranges", size 32
;; custom section ".debug_abbrev", size 401
;; custom section ".debug_line", size 280
;; custom section ".debug_str", size 1569
;; custom section ".debug_pubnames", size 235
;; custom section "producers", size 79
)
It is very promising that this module imports __stack_pointer and __memory_base, and that it puts the static data ("\01gdb_load_rust_pretty_printers.py\00Hi") at a position that is determined by the imported __memory_base.
But when the code actually references the Hi string, in function start, it simply uses an absolute pointer, without shifting it by __memory_base:
(set_local $var$0
(i32.const 34)
)
so this does not seem to be working yet.
Is there a flag I am using wrong? Or is this just not yet supported?
$ rustc --version
rustc 1.35.0-nightly (3750348da 2019-04-08)
Why does this need to be done? Locals should always refer to the function's stack frame implicitly.
Why does this need to be done? Locals should always refer to the function's stack frame implicitly.
I am not referring to locals; the problem is the (i32.const 34): This seems to be the pointer to "Hi", as an absolute pointer. I would expect this to be (i32.add (get_global $gimport$2) (i32.const 34)), so that the address is taken relative to the __memory_base.
JFTR, same happens with crate-type = ["cdylib"]
Ah I see, that makes more sense
To reproduce without cargo, I am using this command line:
rustc foo.rs --crate-type cdylib -o foo.wasm --target wasm32-unknown-unknown
With -C incremental=foo I can get my hold on the .o files passed to lld, and these files are WebAssembly modules with a static linking section, including a code relocation section according to https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md, but I can see that lld isn’t really in a position to turn this into a dynamically linkable library.
Relevant bits:
$ wasm-dis foo/lib-1oxx6vmtp942d/s-fbld28zq8j-122tlgg-2xj38ws0mwqgo/x73358tcv87tyh4.o
(module
(type $0 (func))
(type $1 (func (param i32)))
(type $2 (func (param i32 i32) (result i32)))
(import "env" "__linear_memory" (memory $0 1))
(import "env" "__indirect_function_table" (table 0 anyfunc))
(import "env" "_ZN4core3str21_$LT$impl$u20$str$GT$6as_ptr17h490791c777cb2b36E" (func $fimport$2 (param i32 i32) (result i32)))
(import "import" "foo" (func $fimport$3 (param i32)))
(data (i32.const 0) "Hi")
…
(func $2 (; 4 ;) (type $0)
…
(set_local $var$0
(i32.const 0)
)
…
)
;; custom section "linking", size 117
;; custom section "reloc.CODE", size 12
)
rustc is using LLVM to generate and link WebAssembly code, right? Just to narrow down what’s happening where, here is the equivalent in C:
/tmp $ cat foo.c
extern int bar (int, char *);
char *s = "Foo";
int foo (int n) { return bar(n, s); }
/tmp $ clang-9 --compile -fpic foo.c --target=wasm32-unknown-unknown-wasm --optimize=3 --output foo.o
/tmp $ wasm-ld-9 --shared --no-entry foo.o -o foo.wasm --export=foo --gc-sections
/tmp $ wasm-dis foo.wasm
(module
(type $0 (func (param i32 i32) (result i32)))
(type $1 (func))
(type $2 (func (param i32) (result i32)))
(import "env" "memory" (memory $0 0))
(import "env" "__indirect_function_table" (table 0 anyfunc))
(import "env" "__memory_base" (global $gimport$2 i32))
(import "env" "__table_base" (global $gimport$3 i32))
(import "env" "bar" (func $bar (param i32 i32) (result i32)))
(data (get_global $gimport$2) "Foo\00\00\00\00\00")
(export "foo" (func $foo))
(func $__wasm_call_ctors (; 1 ;) (type $1)
(call $__wasm_apply_relocs)
)
(func $__wasm_apply_relocs (; 2 ;) (type $1)
(i32.store
(i32.add
(get_global $gimport$2)
(i32.const 4)
)
(i32.add
(get_global $gimport$2)
(i32.const 0)
)
)
)
(func $foo (; 3 ;) (type $2) (param $var$0 i32) (result i32)
(call $bar
(get_local $var$0)
(i32.load
(i32.add
(get_global $gimport$2)
(i32.const 4)
)
)
)
)
;; custom section "dylink", size 5
;; custom section "producers", size 52
)
Note how $foo calculates the address to the static memory based on $gimport$2.
Since I passed -fpic to clang here, I tried passing -C relocation-model=pic to rustc, but the output is unchanged.
Possibly related:
rustc simply doesn’t support producing the PIC code needed for shared libraries.FWIW, back in https://github.com/rust-lang/rust/issues/46645#issuecomment-370906261 (March 2018) I had success with making a relocatable WASM module. Of course it requires a preprocessing bundler / the WASM loader to actually relocate the modules, so it's not as trivial as a PIC module.
For my use case, I decided that PIC (e.g. importing a global that indicates memory offeset) is more suited than static relocation sections.
@alexcrichton It does look like #59712 is related.
At least passing -C relocation-model=pic in addition to -C link-arg=--shared doesn't seem to have any effect, so addresses stay incorrect (static).
Do you think there's a way to override relocation mode back to PIC for consumer experimentation with shared Wasm libraries?
AFAIK no work really has been done to get Rust/wasm working with the dynamic linking proposal. It's pretty highly unlikely that no changes to rustc will be necessary to get things working. I don't personally have time to work on it right now.
My guess is that either the upstream proposal will need to change to support a more ELF-like way of compiling shared objects and linking them into a static-like version, a new target will need to be added, or the standard library will need to be recompiled from scratch with xargo/cargo-xbuild and new codegen flags.
Any progress on this?
I tried this with rustc 1.46.0-nightly (0c03aee8b 2020-07-05) just to see if anything changed in the meantime. Currently -Clink-arg=--shared causes this error:
error: linking with `rust-lld` failed: exit code: 1
|
= note: "rust-lld" "-flavor" "wasm" "--no-threads" "-z" "stack-size=1048576" "--stack-first" "--allow-undefined" "--fatal-warnings" "--no-demangle" "--export-dynamic" "--no-entry" "-L" "/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib" "-L" "/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib/self-contained" "foo.main.7rcbfp3g-cgu.0.rcgu.o" "foo.main.7rcbfp3g-cgu.1.rcgu.o" "-o" "foo.wasm" "--export" "start" "--export=__heap_base" "--export=__data_end" "--gc-sections" "-O0" "-L"
"/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib" "/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib/librustc_std_workspace_core-338ea5f3d59665eb.rlib" "/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib/libcore-c883bdbeb473297b.rlib" "/home/omer/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/wasm32-unknown-unknown/lib/libcompiler_builtins-4a8deb6b3def81fd.rlib" "--shared" "--import-memory"
= note: rust-lld: error: foo.main.7rcbfp3g-cgu.1.rcgu.o: relocation R_WASM_MEMORY_ADDR_SLEB cannot be used against symbol .L__unnamed_1; recompile with -fPIC
error: aborting due to previous error
If I remove --shared but keep --import-memory I get the same absolute pointer in start. -C relocation-model=... parameters don't make any difference.
Interestingly if I try the C example above:
extern int bar(int, char *);
char *s = "Foo";
int foo(int n)
{
return bar(n, s);
}
The error is very similar to the one I get from rustc when I try with -Clink-arg=--shared:
$ clang-10 -c -fPIC test.c --target=wasm32-unknown-unknown-wasm -o test.o -O3
$ wasm-ld --shared --no-entry test.o -o test.wasm --export=foo --gc-sections
wasm-ld: error: test.o: relocation R_WASM_MEMORY_ADDR_LEB cannot be used against symbol s; recompile with -fPIC
This says R_WASM_MEMORY_ADDR_LEB instead of R_WASM_MEMORY_ADDR_SLEB but I suspect the root cause is the same.
The lld documentation (https://lld.llvm.org/WebAssembly.html) says
No support for creating shared libraries. The spec for shared libraries in WebAssembly is still in flux: https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
So I'm guessing this is something to fix/implement on lld side first before making any changes in rustc?
Regarding the R_WASM_MEMORY_ADDR_SLEB error I mention above, I was reading about this and came across this blog post, which says that the error only happens when using static strings. I confirmed that that's also the case with Rust. For example, in the original reproducer, if I change main:
#![no_main]
#![no_std]
#![feature(link_args, lang_items, core_intrinsics)]
#![allow(unused_attributes)]
#![link_args = "--import-memory"]
#![link_args = "--shared"]
#[panic_handler]
#[no_mangle]
pub fn panic(_info: &::core::panic::PanicInfo) -> ! {
::core::intrinsics::abort();
}
#[lang = "eh_personality"]
extern fn rust_eh_personality() {}
#[link(wasm_import_module = "import")]
extern { pub fn foo(ptr : *const u8); }
#[export_name = "main"]
pub unsafe fn main() {
let x = 5u8;
foo(&x as *const u8);
let hi: [u8; 2] = [b'h', b'i'];
foo(hi.as_ptr());
}
I can build this just fine with
$ rustc lib.rs --target=wasm32-unknown-unknown -O
The wat:
(module
(type (;0;) (func (param i32)))
(type (;1;) (func))
(import "env" "memory" (memory (;0;) 0))
(import "env" "__indirect_function_table" (table (;0;) 0 funcref))
(import "env" "__stack_pointer" (global (;0;) (mut i32)))
(import "env" "__memory_base" (global (;1;) i32))
(import "env" "__table_base" (global (;2;) i32))
(import "import" "foo" (func $_ZN3lib3foo17hb22c31d0714767f1E (type 0)))
(func $__wasm_call_ctors (type 1)
call $__wasm_apply_relocs)
(func $__wasm_apply_relocs (type 1))
(func $main (type 1)
(local i32)
global.get 0
i32.const 16
i32.sub
local.tee 0
global.set 0
local.get 0
i32.const 5
i32.store8 offset=13
local.get 0
i32.const 13
i32.add
call $_ZN3lib3foo17hb22c31d0714767f1E
local.get 0
i32.const 26984
i32.store16 offset=14 align=1
local.get 0
i32.const 14
i32.add
call $_ZN3lib3foo17hb22c31d0714767f1E
local.get 0
i32.const 16
i32.add
global.set 0)
(export "main" (func $main)))
However if I make the array static
static HI: [u8; 2] = [b'h', b'i'];
#[export_name = "main"]
pub unsafe fn main() {
let x = 5u8;
foo(&x as *const u8);
foo(HI.as_ptr());
}
This fails with the same error.
So it seems like there's something to be fixed in handling of static data. The blog post linked above suggests that this is a clang/LLVM bug as they reproduce the problem in a C file.
Certianly not a clang/LLVM bug. The blog post refers to LLVM-8 (where indeed it didn't work), but since LLVM-9 I can reliably produce shared WebAssembly libraries from C, including static data.
And it’s not surpring that this is related to static data: If you don’t have static data, there is nothing to relocate upon linking :-)
I'm not sure. Here's an example in C:
extern void f(char*);
static char *s = "blah";
void test(void) {
f(s);
}
Using clang-10:
$ clang --version
clang version 10.0.0
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/omer/Downloads/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin
$ clang --target=wasm32-unknown-unknown -nostdlib -c -fPIC -o lib.o lib.c
$ wasm-ld --version
LLD 10.0.0
$ wasm-ld --shared --import-memory --export-all --allow-undefined lib.o -o lib.wasm
wasm-ld: error: lib.o: relocation R_WASM_MEMORY_ADDR_LEB cannot be used against symbol s; recompile with -fPIC
It's not the exact same error though -- it complains about a R_WASM_MEMORY_ADDR_LEB relocation instead of R_WASM_MEMORY_ADDR_SLEB.
I just realized that if I use --target=wasm32-unknown-emscripten above (instead of wasm32-unknown-unknown) it builds:
$ clang --target=wasm32-unknown-emscripten -nostdlib -c -fPIC -o lib.o lib.c
$ wasm-ld --shared --import-memory --export-all --allow-undefined lib.o -o lib.wasm
When building Rust simply using the target wasm32-unknown-emscripten is not enough, we also need --crate-type=lib and -Crelocation-model=pic. Full example:
$ cat lib.rs
#![no_std]
#![feature(lang_items, core_intrinsics)]
#[panic_handler]
#[no_mangle]
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
core::intrinsics::abort();
}
#[lang = "eh_personality"]
extern "C" fn rust_eh_personality() {}
extern "C" {
pub fn foo(ptr: *const u8);
}
#[export_name = "_start"]
pub unsafe fn main() {
foo("hi".as_ptr());
}
$ rustc lib.rs --target=wasm32-unknown-emscripten -O --crate-type=lib -Crelocation-model=pic
This generates a liblib.rlib file which we need to unpack to get the .o file:
$ ar -x liblib.rlib
Now if we look at the code in the .o file:
$ wasm-objdump -d lib.lib.3a1fbbbh-cgu.0.rcgu.o
lib.lib.3a1fbbbh-cgu.0.rcgu.o: file format wasm 0x1
Code Disassembly:
00008e func[1] <rust_begin_unwind>:
00008f: 00 | unreachable
000090: 00 | unreachable
000091: 0b | end
000093 func[2] <rust_eh_personality>:
000094: 0b | end
000096 func[3] <_start>:
000097: 23 80 80 80 80 00 | global.get 0 <env.__memory_base>
00009d: 41 80 80 80 80 00 | i32.const 0
0000a3: 6a | i32.add
0000a4: 10 80 80 80 80 00 | call 0 <env.foo>
0000aa: 0b | end
Here the address of the string (static data) is relative to env.__memory_base, which is what we were trying to generate.
Great, that’s all I wanted :-)
Most helpful comment
I just realized that if I use
--target=wasm32-unknown-emscriptenabove (instead ofwasm32-unknown-unknown) it builds: