Please answer these questions before submitting your issue. Thanks!
go version)?go1.10devel
go env)?amd64
Compiled various simple loops that include "if flag { call(); }" (and that is the only call in the loop)
All spills and reloads grouped around the call and not in the common code path of the loop.
Half-right -- the reload occurs under the conditional, but it is not properly hoisted out of the loop so there is also a reload in the common code path
Sample code and compiled assembly follow:
package foo
var flag bool = true
//go:noinline
func k() {
flag = false
}
func f1(p []byte) bool {
i := 0
l := len(p)
if i < l {
for {
if p[i] != 0 {
return true
}
i++
if i >= l {
break
}
if flag {
k()
}
}
}
return false
}
func f2(p []byte) bool {
i := 0
l := len(p)
if i < l {
for {
if p[i] != 0 {
return true
}
i++
if flag {
k()
}
if i >= l {
break
}
}
}
return false
}
func f3(p []byte) bool {
for _, x := range p {
if x != 0 {
return true
}
if flag {
k()
}
}
return false
}
// This one has no call and the loads are done right.
func f0(p []byte) bool {
i := 0
l := len(p)
if i < l {
for {
if p[i] != 0 {
return true
}
i++
if i >= l {
break
}
}
}
return false
}
genssa f1
00000 (.../spillsink.go:10) TEXT "".f1(SB)
00001 (.../spillsink.go:10) FUNCDATA $0, gclocals·...(SB)
00002 (.../spillsink.go:10) FUNCDATA $1, gclocals·...(SB)
v8 00003 (.../spillsink.go:10) MOVQ "".p+8(SP), AX
v30 00004 (.../spillsink.go:13) TESTQ AX, AX
b1 00005 (.../spillsink.go:13) JLE 26
v43 00006 (.../spillsink.go:13) MOVL $0, CX
v33 00007 (.../spillsink.go:15) CMPQ CX, AX
b5 00008 (.../spillsink.go:15) JCC 30
v22 00009 (.../spillsink.go:15) MOVQ "".p(SP), DX <-- this load should occur before the loop
v20 00010 (.../spillsink.go:15) MOVBLZX (DX)(CX*1), BX
v9 00011 (.../spillsink.go:15) TESTB BX, BX
b10 00012 (.../spillsink.go:15) JNE 28
v29 00013 (.../spillsink.go:18) INCQ CX
v6 00014 (.../spillsink.go:19) CMPQ CX, AX
b9 00015 (.../spillsink.go:19) JGE 26
v34 00016 (.../spillsink.go:22) MOVBLZX "".flag(SB), BX
v19 00017 (.../spillsink.go:22) TESTB BX, BX
b13 00018 (.../spillsink.go:22) JEQ 7
v18 00019 (.../spillsink.go:22) MOVQ CX, "".i-8(SP)
v36 00020 (.../spillsink.go:23) PCDATA $0, $0
v36 00021 (.../spillsink.go:23) CALL "".k(SB)
v31 00022 (.../spillsink.go:23) MOVQ "".p+8(SP), AX
v23 00023 (.../spillsink.go:23) MOVQ "".i-8(SP), CX
v35 00024 (.../spillsink.go:23) MOVQ "".p(SP), DX <-- this reload is good.
b14 00025 (.../spillsink.go:23) JMP 7
v39 00026 (.../spillsink.go:27) MOVB $0, "".~r1+24(SP)
b3 00027 (.../spillsink.go:27) RET
v26 00028 (.../spillsink.go:16) MOVB $1, "".~r1+24(SP)
b8 00029 (.../spillsink.go:16) RET
v16 00030 (.../spillsink.go:15) PCDATA $0, $1
v16 00031 (.../spillsink.go:15) CALL runtime.panicindex(SB)
b11 00032 (.../spillsink.go:15) UNDEF
00033 (<unknown line number>) END
genssa f2
00000 (.../spillsink.go:30) TEXT "".f2(SB)
00001 (.../spillsink.go:30) FUNCDATA $0, gclocals·...(SB)
00002 (.../spillsink.go:30) FUNCDATA $1, gclocals·...(SB)
v8 00003 (.../spillsink.go:30) MOVQ "".p+8(SP), AX
v27 00004 (.../spillsink.go:33) TESTQ AX, AX
b1 00005 (.../spillsink.go:33) JLE 19
v22 00006 (.../spillsink.go:33) MOVL $0, CX
v15 00007 (.../spillsink.go:35) CMPQ CX, AX
b5 00008 (.../spillsink.go:35) JCC 30
v37 00009 (.../spillsink.go:35) MOVQ "".p(SP), DX <-- this load should occur before the loop
v20 00010 (.../spillsink.go:35) MOVBLZX (DX)(CX*1), BX
v17 00011 (.../spillsink.go:35) TESTB BX, BX
b10 00012 (.../spillsink.go:35) JNE 28
v32 00013 (.../spillsink.go:39) MOVBLZX "".flag(SB), BX
v35 00014 (.../spillsink.go:39) TESTB BX, BX
b9 00015 (.../spillsink.go:39) JNE 21
v29 00016 (.../spillsink.go:38) INCQ CX
v31 00017 (.../spillsink.go:42) CMPQ CX, AX
b13 00018 (.../spillsink.go:42) JLT 7
v40 00019 (.../spillsink.go:47) MOVB $0, "".~r1+24(SP)
b3 00020 (.../spillsink.go:47) RET
v6 00021 (.../spillsink.go:47) MOVQ CX, "".i-8(SP)
v34 00022 (.../spillsink.go:40) PCDATA $0, $0
v34 00023 (.../spillsink.go:40) CALL "".k(SB)
v18 00024 (.../spillsink.go:40) MOVQ "".p+8(SP), AX
v23 00025 (.../spillsink.go:40) MOVQ "".i-8(SP), CX
v33 00026 (.../spillsink.go:40) MOVQ "".p(SP), DX <-- this reload is good.
b12 00027 (.../spillsink.go:40) JMP 16
v26 00028 (.../spillsink.go:36) MOVB $1, "".~r1+24(SP)
b8 00029 (.../spillsink.go:36) RET
v16 00030 (.../spillsink.go:35) PCDATA $0, $1
v16 00031 (.../spillsink.go:35) CALL runtime.panicindex(SB)
b11 00032 (.../spillsink.go:35) UNDEF
00033 (<unknown line number>) END
genssa f3
00000 (.../spillsink.go:50) TEXT "".f3(SB)
00001 (.../spillsink.go:50) FUNCDATA $0, gclocals·...(SB)
00002 (.../spillsink.go:50) FUNCDATA $1, gclocals·...(SB)
v22 00003 (.../spillsink.go:50) MOVL $0, AX
v43 00004 (.../spillsink.go:50) MOVQ "".p(SP), CX
b1 00005 (.../spillsink.go:51) JMP 8
v34 00006 (.../spillsink.go:51) INCQ CX
v37 00007 (.../spillsink.go:51) INCQ AX
v23 00008 (.../spillsink.go:51) MOVQ "".p+8(SP), DX <-- this load should occur before the loop
v6 00009 (.../spillsink.go:51) CMPQ AX, DX
b2 00010 (.../spillsink.go:51) JGE 27
v19 00011 (.../spillsink.go:51) MOVBLZX (CX), BX
v9 00012 (.../spillsink.go:52) TESTB BX, BX
b3 00013 (.../spillsink.go:52) JNE 25
v29 00014 (.../spillsink.go:55) MOVBLZX "".flag(SB), BX
v28 00015 (.../spillsink.go:55) TESTB BX, BX
b7 00016 (.../spillsink.go:55) JEQ 6
v7 00017 (.../spillsink.go:55) MOVQ AX, ""..autotmp_8-16(SP)
v11 00018 (.../spillsink.go:55) MOVQ CX, ""..autotmp_9-8(SP)
v31 00019 (.../spillsink.go:56) PCDATA $0, $1
v31 00020 (.../spillsink.go:56) CALL "".k(SB)
v15 00021 (.../spillsink.go:56) MOVQ ""..autotmp_8-16(SP), AX
v36 00022 (.../spillsink.go:56) MOVQ ""..autotmp_9-8(SP), CX
v33 00023 (.../spillsink.go:56) MOVQ "".p+8(SP), DX <-- this reload is good.
b8 00024 (.../spillsink.go:56) JMP 6
v26 00025 (.../spillsink.go:53) MOVB $1, "".~r1+24(SP)
b6 00026 (.../spillsink.go:53) RET
v40 00027 (.../spillsink.go:59) MOVB $0, "".~r1+24(SP)
b5 00028 (.../spillsink.go:59) RET
00029 (<unknown line number>) END
genssa f0
00000 (.../spillsink.go:62) TEXT "".f0(SB)
00001 (.../spillsink.go:62) FUNCDATA $0, gclocals·...(SB)
00002 (.../spillsink.go:62) FUNCDATA $1, gclocals·...(SB)
v8 00003 (.../spillsink.go:62) MOVQ "".p+8(SP), AX
v11 00004 (.../spillsink.go:65) TESTQ AX, AX
b1 00005 (.../spillsink.go:65) JLE 16
v22 00006 (.../spillsink.go:65) MOVQ "".p(SP), CX
v18 00007 (.../spillsink.go:65) MOVL $0, DX
v24 00008 (.../spillsink.go:67) CMPQ DX, AX
b5 00009 (.../spillsink.go:67) JCC 20
v20 00010 (.../spillsink.go:67) MOVBLZX (CX)(DX*1), BX
v9 00011 (.../spillsink.go:67) TESTB BX, BX
b10 00012 (.../spillsink.go:67) JNE 18
v29 00013 (.../spillsink.go:70) INCQ DX
v6 00014 (.../spillsink.go:71) CMPQ DX, AX
b9 00015 (.../spillsink.go:71) JLT 8
v34 00016 (.../spillsink.go:76) MOVB $0, "".~r1+24(SP)
b3 00017 (.../spillsink.go:76) RET
v26 00018 (.../spillsink.go:68) MOVB $1, "".~r1+24(SP)
b8 00019 (.../spillsink.go:68) RET
v16 00020 (.../spillsink.go:67) PCDATA $0, $1
v16 00021 (.../spillsink.go:67) CALL runtime.panicindex(SB)
b11 00022 (.../spillsink.go:67) UNDEF
00023 (<unknown line number>) END
This issue affects adding preemption checks to loops; the conditional call models the preemption check, which suffers the same extra load in the loop, and if the loop is tight it matters.
@randall77 @aclements
On f3 at least, I see what is going on.
When there is a call in the loop, we don't preload values into registers before jumping into the loop. p.len is such a value.
see cmd/compile/internal/ssa/regalloc.go:1433-1443
// If we are approaching a merge point and we are the primary
// predecessor of it, find live values that we use soon after
// the merge point and promote them to registers now.
if len(b.Succs) == 1 {
// For this to be worthwhile, the loop must have no calls in it.
top := b.Succs[0].b
loop := s.loopnest.b2l[top.ID]
if loop == nil || loop.header != top || loop.containsCall {
goto badloop
}
The logic there is if there's a call in the loop, we're going to have to load the value inside the loop anyway. No point in loading it before the loop starts as well.
This logic kinda falls down when the call is inside an if. Maybe we should keep this optimization if there is a call-free path around the loop?
This isn't a problem for the pointer value, because it is modified during the loop. It has a phi at the top of the loop that causes it to be allocated to a register.
I'm pretty sure that there's code (or there was code in some CL) that computed whether a loop contained a call-free path. I could see if I could find that somewhere.
That would be great if that code existed somewhere. I think just replacing containsCall with containsUnavoidableCall will do it.
Some branch hint information might be useful as well, maybe we ignore known unlikely paths when looking for a call, but likely (or unknown) paths count. Then it isn't just a possible call-free path, but no non-unlikely call-full path.
Do we need to know the blocks that are in the call-free paths? I couldn't find the code, but I'm pretty sure I can reconstitute it.
Nope. We only need the info on block entry. It would be enough to modify the semantics of the containsCall boolean. I don't think it is used anywhere else.
Changed milestone because this was motivated by the preemptible loop changes, which are not appearing in 1.10.
@dr2chase are you working on containsCall changes?
I've made a quick and dirty prototype to fix #22698 and I'd like to avoid effort duplication.
I am not working on that right now, I attempted something general and
didn't get it working.
Works for innermost loops is probably all we really need to get most of the
benefit.
On Thu, Nov 30, 2017 at 3:28 PM, Ilya Tocar notifications@github.com
wrote:
@dr2chase https://github.com/dr2chase are you working on containsCall
changes?
I've made a quick and dirty prototype to fix #22698
https://github.com/golang/go/issues/22698 and I'd like to avoid effort
duplication.—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/golang/go/issues/22234#issuecomment-348311014, or mute
the thread
https://github.com/notifications/unsubscribe-auth/AB1vJ_SCSHX9S7u2ewlwKH2nlPlYUGyrks5s7w_zgaJpZM4P3STC
.
Change https://golang.org/cl/84055 mentions this issue: cmd/compile/internal/ssa: update regalloc in loops
@dr2chase David, did you ever try to reconstitute your code?
@TocarIP Ilya, do you think your CL is still the way forward?
I think Ilya's CL is the way forward. It is mostly there. If he doesn't have time to get to it, I am happy to look into it in a few weeks if that delay is okay. Right now I am working on debugging stuff.
No hurry, just want to make sure that something makes it into 1.11. We've seen enough incarnations of this issue that we should fix it.
I was planing to work on it last week, however I'm currently having problems[1] with my google account, once they are resolved I will push updated version.
1: "ilya.[email protected] has already been registered under a different account accessible by:
ilya.[email protected]"
@andybons, can you look into Ilya's Gerrit account problems?
I think my account was deleted and now I have new account with the same email. Now I see 2 ilya.[email protected] accounts at https://go-review.googlesource.com old one with all CL that I can't login into and new one that can login into, but can't +2/change my old CLs
On it. Filed internal bug.
@TocarIP OK you should be able to log in now.
@andybons Thanks, it works now!
@TocarIP @randall77 should this still be a release blocker for 1.11?
This is fixed by Ilya's CL of March 20.
Most helpful comment
@andybons Thanks, it works now!