Hi Andrew and anyone else,
With this issue I'd like to post some code segments with their { source, IR0 and IR } dumps. If you could comment and what is happening with result location that would be greatly appreciated.
note: I will use "diff" syntax to highlight/attention IR lines
fn entry_a()export fn entry_a() void {
var x: u32 = 33;
}
fn entry_a() { // (IR)
Entry_0:
! #1 | ResetResult | (unknown) | - | ResetResult(none)
! #2 | ResetResult | (unknown) | - | ResetResult(none)
! #3 | ResetResult | (unknown) | - | ResetResult(none)
#4 | Const | type | 2 | u32
! #5 | EndExpr | (unknown) | - | EndExpr(result=none,value=u32)
#6 | Const | bool | 2 | false
#7 | AllocaSrc | (unknown) | 1 | Alloca(align=(null),name=x)
! #8 | ResetResult | (unknown) | - | ResetResult(var(#7))
! #9 | ResetResult | (unknown) | - | ResetResult(cast(ty=u32))
#10 | Const | comptime_int| 2 | 33
! #11 | EndExpr | (unknown) | - | EndExpr(result=cast(ty=u32),value=33)
#12 | ImplicitCast | (unknown) | 1 | @implicitCast(33)result=cast(ty=u32)
! #13 | EndExpr | (unknown) | - | EndExpr(result=var(#7),value=#12)
#14 | DeclVarSrc | void | - | var x = #7 // comptime = false
#15 | Const | void | 0 | {}
#16 | Const | void | 3 | {}
! #17 | EndExpr | (unknown) | - | EndExpr(result=none,value={})
#18 | AddImplicitReturnType | (unknown) | - | @addImplicitReturnType({})
#19 | Return | noreturn | - | return {}
}
fn entry_a() { // (analyzed)
Entry_0:
#16 | StorePtr | void | - | *#12 = 33
:12 | AllocaGen | *u32 | 2 | Alloca(align=0,name=x)
#17 | DeclVarGen | void | - | var x: u32 align(4) = #12 // comptime = false
#21 | Return | noreturn | - | return {}
}
The first thing I want to point out in this example, is that with a scalar type such as an integer, the result location semantics is pure overhead. Types in Zig have this property handle_is_ptr, which tells whether or not Zig would prefer to avoid copies of the type. For integers, this value is false, meaning that Zig will make copies of integers rather than using result location semantics to avoid copying.
So, in this example, what is the result location? Trace the expression from outer to inner, left to right. First, we have a variable x. This is the result location. Next we have type u32. To Zig, these are generated identically:
var x = @as(u32, 33);
var x: u32 = 33;
In either case, we set up a ResultLocationVar for x, and pass that down to the next expression generator. This creates ResultLocationCast, with destination type u32 and parent result location the ResultLocationVar.
Finally, when we evaluate the expression 33, we have the ability to obtain the result location for the current expression, which will give us a *u32. In order to obtain this pointer, one must provide the type, and value if applicable, that one intends to store into the result location.
With this example, integer literals do not need to access the result location; instead they produce a value, and they rely on the EndExpr instruction to write the value to the result location.
This IR instruction is emitted at the end of every expression. Every expression has a result location, and this instruction detects if the result location was written to by the IR evaluation of the expression. If not, then the EndExpr instruction does it.
In the original example in this issue, this code is what ends up calling ir_analyze_store_ptr which generates the store instruction to the variable x.
However in this example, was_written will be true and so EndExpr will do nothing:
export fn entry_a() void {
var x = foo();
}
fn foo() struct { a: i32, b: i32} {
return .{ .a = 1, .b = 2 };
}
This is because the IR analysis for the function call deals explicitly with the result location, setting written = true.
static IrInstruction *ir_analyze_instruction_end_expr(IrAnalyze *ira, IrInstructionEndExpr *instruction) {
IrInstruction *value = instruction->value->child;
if (type_is_invalid(value->value->type))
return ira->codegen->invalid_instruction;
bool was_written = instruction->result_loc->written;
IrInstruction *result_loc = ir_resolve_result(ira, &instruction->base, instruction->result_loc,
value->value->type, value, false, false, true);
if (result_loc != nullptr) {
if (type_is_invalid(result_loc->value->type))
return ira->codegen->invalid_instruction;
if (result_loc->value->type->id == ZigTypeIdUnreachable)
return result_loc;
if (!was_written || instruction->result_loc->id == ResultLocIdPeer) {
IrInstruction *store_ptr = ir_analyze_store_ptr(ira, &instruction->base, result_loc, value,
instruction->result_loc->allow_write_through_const);
if (type_is_invalid(store_ptr->value->type)) {
return ira->codegen->invalid_instruction;
}
}
if (result_loc->value->data.x_ptr.mut == ConstPtrMutInfer &&
instruction->result_loc->id != ResultLocIdPeer)
{
if (instr_is_comptime(value)) {
result_loc->value->data.x_ptr.mut = ConstPtrMutComptimeConst;
} else {
result_loc->value->special = ConstValSpecialRuntime;
}
}
}
return ir_const_void(ira, &instruction->base);
}
You can see here there is some hacky special-case logic dealing with "peer" result locations and comptime constants.
What the ResetResult instruction actually does is this:
static void ir_reset_result(ResultLoc *result_loc) {
result_loc->written = false;
result_loc->resolved_loc = nullptr;
result_loc->gen_instruction = nullptr;
result_loc->implicit_elem_type = nullptr;
switch (result_loc->id) {
case ResultLocIdInvalid:
zig_unreachable();
case ResultLocIdPeerParent: {
ResultLocPeerParent *peer_parent = reinterpret_cast<ResultLocPeerParent *>(result_loc);
peer_parent->skipped = false;
peer_parent->done_resuming = false;
peer_parent->resolved_type = nullptr;
for (size_t i = 0; i < peer_parent->peers.length; i += 1) {
ir_reset_result(&peer_parent->peers.at(i)->base);
}
break;
}
case ResultLocIdVar: {
IrInstructionAllocaSrc *alloca_src =
reinterpret_cast<IrInstructionAllocaSrc *>(result_loc->source_instruction);
alloca_src->base.child = nullptr;
break;
}
case ResultLocIdReturn:
reinterpret_cast<ResultLocReturn *>(result_loc)->implicit_return_type_done = false;
break;
case ResultLocIdPeer:
case ResultLocIdNone:
case ResultLocIdInstruction:
case ResultLocIdBitCast:
case ResultLocIdCast:
break;
}
}
That is, it resets the state of the structs to their "zero" values:
// Additions to this struct may need to be handled in
// ir_reset_result
struct ResultLoc {
ResultLocId id;
bool written;
bool allow_write_through_const;
IrInstruction *resolved_loc; // result ptr
IrInstruction *source_instruction;
IrInstruction *gen_instruction; // value to store to the result loc
ZigType *implicit_elem_type;
};
In the simple example:
export fn entry_a() void {
var x: u32 = 33;
}
resetting the result location actually is not necessary. But in this example it is necessary:
export fn entry_a() void {
inline for ([_]i32{ 1, 2 }) |item| {
var x: u32 = item;
}
}
Because the result locations inside the loop refer to the same objects in memory on different runs of the loop, we need to "reset the state" between loop iterations.
Most helpful comment
The first thing I want to point out in this example, is that with a scalar type such as an integer, the result location semantics is pure overhead. Types in Zig have this property
handle_is_ptr, which tells whether or not Zig would prefer to avoid copies of the type. For integers, this value isfalse, meaning that Zig will make copies of integers rather than using result location semantics to avoid copying.So, in this example, what is the result location? Trace the expression from outer to inner, left to right. First, we have a variable
x. This is the result location. Next we have typeu32. To Zig, these are generated identically:In either case, we set up a
ResultLocationVarforx, and pass that down to the next expression generator. This createsResultLocationCast, with destination typeu32and parent result location theResultLocationVar.Finally, when we evaluate the expression
33, we have the ability to obtain the result location for the current expression, which will give us a*u32. In order to obtain this pointer, one must provide the type, and value if applicable, that one intends to store into the result location.With this example, integer literals do not need to access the result location; instead they produce a value, and they rely on the EndExpr instruction to write the value to the result location.
EndExpr
This IR instruction is emitted at the end of every expression. Every expression has a result location, and this instruction detects if the result location was written to by the IR evaluation of the expression. If not, then the EndExpr instruction does it.
In the original example in this issue, this code is what ends up calling
ir_analyze_store_ptrwhich generates the store instruction to the variablex.However in this example,
was_writtenwill betrueand so EndExpr will do nothing:This is because the IR analysis for the function call deals explicitly with the result location, setting
written = true.You can see here there is some hacky special-case logic dealing with "peer" result locations and comptime constants.
ResetResult
What the ResetResult instruction actually does is this:
That is, it resets the state of the structs to their "zero" values:
In the simple example:
resetting the result location actually is not necessary. But in this example it is necessary:
Because the result locations inside the loop refer to the same objects in memory on different runs of the loop, we need to "reset the state" between loop iterations.
Other Result Location Topics