E[08-04|03:49:20.775] CONSENSUS FAILURE!!! module=consensus err="should not already be unbonded, validator: {A7B3C7DA2964ED56C46142C8254B9249E17FD8FB PubKeyEd25519{11A7882002254A7995D7D5B94DFA2E6772AEA2C6A675B8DDF7E9FB8263D3245E} false 0 29/1 290/9 {Stone [do-not-modify] [do-not-modify] [do-not-modify]} 20127 0 0/1 0/1 0/1 0/1 0/1}"
stack="goroutine 1159 [running]:
runtime/debug.Stack(0xc4327dbb58, 0xd01500, 0xc4327a39e0)
/snap/go/2130/src/runtime/debug/stack.go:24 +0xa7
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine.func1(0xc420110900)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:558 +0x57
panic(0xd01500, 0xc4327a39e0)
/snap/go/2130/src/runtime/panic.go:502 +0x229
github.com/cosmos/cosmos-sdk/x/stake/keeper.Keeper.unbondValidator(0x105dbe0, 0xc42001cb90, 0xc4200fc540, 0x105dbe0, 0xc42001cb60, 0xfe78b0, 0xc4200fc540, 0x4, 0x10658e0, 0xc42b3caab0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/stake/keeper/validator.go:487 +0x797
github.com/cosmos/cosmos-sdk/x/stake/keeper.Keeper.UpdateBondedValidators(0x105dbe0, 0xc42001cb90, 0xc4200fc540, 0x105dbe0, 0xc42001cb60, 0xfe78b0, 0xc4200fc540, 0x4, 0x10658e0, 0xc42b3caab0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/stake/keeper/validator.go:384 +0x867
github.com/cosmos/cosmos-sdk/x/stake/keeper.Keeper.UpdateValidator(0x105dbe0, 0xc42001cb90, 0xc4200fc540, 0x105dbe0, 0xc42001cb60, 0xfe78b0, 0xc4200fc540, 0x4, 0x10658e0, 0xc42b3caab0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/stake/keeper/validator.go:238 +0x6db
github.com/cosmos/cosmos-sdk/x/stake/keeper.Keeper.Slash(0x105dbe0, 0xc42001cb90, 0xc4200fc540, 0x105dbe0, 0xc42001cb60, 0xfe78b0, 0xc4200fc540, 0x4, 0x10658e0, 0xc42b3caab0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/stake/keeper/slash.go:103 +0xccc
github.com/cosmos/cosmos-sdk/x/slashing.Keeper.handleValidatorSignature(0x105dbe0, 0xc42001cbb0, 0xc4200fc540, 0x106bf80, 0xc420080880, 0xa, 0x10658e0, 0xc42b3caab0, 0xc425e5dc00, 0x9, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/slashing/keeper.go:106 +0x7ea
github.com/cosmos/cosmos-sdk/x/slashing.BeginBlocker(0x10658e0, 0xc42b3caab0, 0xc425e5dc00, 0x9, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, 0x10ea0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/x/slashing/tick.go:28 +0x1d8
github.com/cosmos/cosmos-sdk/cmd/gaia/app.(*GaiaApp).BeginBlocker(0xc42082c340, 0x10658e0, 0xc42b3caab0, 0xc425e5dc00, 0x9, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/cmd/gaia/app/app.go:131 +0xc3
github.com/cosmos/cosmos-sdk/cmd/gaia/app.(*GaiaApp).BeginBlocker-fm(0x10658e0, 0xc42b3caab0, 0xc425e5dc00, 0x9, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, 0x10ea0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/cmd/gaia/app/app.go:103 +0xa0
github.com/cosmos/cosmos-sdk/baseapp.(*BaseApp).BeginBlock(0xc4207e0000, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, 0x10ea0, 0x5b65223f, 0x0, 0xa7c, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/baseapp/baseapp.go:432 +0x1ef
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/abci/client.(*localClient).BeginBlockSync(0xc42008cd20, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, 0x10ea0, 0x5b65223f, 0x0, 0xa7c, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/abci/client/local_client.go:206 +0xab
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/proxy.(*appConnConsensus).BeginBlockSync(0xc4208c1350, 0xc426ec51e0, 0x14, 0x20, 0xc4333eab50, 0x9, 0x10ea0, 0x5b65223f, 0x0, 0xa7c, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/proxy/app_conn.go:69 +0x78
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/state.execBlockOnProxyApp(0x10664a0, 0xc421a7eaa0, 0x106b320, 0xc4208c1350, 0xc426b4c680, 0xc424556de0, 0x106f540, 0xc42000e080, 0x1, 0xc424766bc0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/state/execution.go:190 +0x53b
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/state.(*BlockExecutor).ApplyBlock(0xc421a18060, 0xc421c924d0, 0x9, 0x10e9f, 0xa7c, 0xc431912e40, 0x14, 0x20, 0x1, 0xc424766bc0, ...)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/state/execution.go:76 +0x12f
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).finalizeCommit(0xc420110900, 0x10ea0)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1290 +0xba6
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryFinalizeCommit(0xc420110900, 0x10ea0)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1221 +0x468
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit.func1(0xc420110900, 0x0, 0x10ea0)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1169 +0x98
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit(0xc420110900, 0x10ea0, 0x0)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1198 +0x802
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote(0xc420110900, 0xc429435b80, 0xc427fd6060, 0x28, 0x17d3360, 0xc4369ddae0, 0x43e819)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1601 +0xbb4
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryAddVote(0xc420110900, 0xc429435b80, 0xc427fd6060, 0x28, 0xfa, 0xf2)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:1459 +0x56
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).handleMsg(0xc420110900, 0xd3e500, 0xc427dcf318, 0xc427fd6060, 0x28)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:628 +0x64f
github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine(0xc420110900, 0x0)
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:580 +0x6d2
created by github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).OnStart
/home/zaki/go/src/github.com/cosmos/cosmos-sdk/vendor/github.com/tendermint/tendermint/consensus/state.go:295 +0x140
"
I think I figured out the situation which might cause this - it should be resolved by https://github.com/cosmos/cosmos-sdk/pull/1858/files
Consider this situation:
I haven't been able to write a failing test for this one yet however
the cliff validator increases power from "5" to "10" but cliff-power record not properly updated
Can you explain this in more detail? What's the sequence around the oldCliffValidatorAddr variable? In your scenario, after step 2 ("6" bonded power), is the cliff validator the one that became 10? Then, a new validator bonds 7 and kicks out "6", wouldn't "10" still be bonded?
after step 2 ("6" bonded power), is the cliff validator the one that became 10?
In this situation the cliff validator should be 10, however because the old cliff power wasn't updated correctly the protocol may think that the cliff validator actually the new validator which has a power of "6"
Then, a new validator bonds 7 and kicks out "6", wouldn't "10" still be bonded?
Correct, however if the "6" is being kicked out (which requests that this validator be unbonded) the protocol would panic because this validator was never bonded to begin with. the original cliff validator would remain bonded the whole time.
here is a more verbose description of the same scenario. Again I haven't been able to write a test for this but I think this which actually panics, which leads me to think that the real scenario is _not_ this one but maybe a close variant.
I think the postmortem also closes this? @cwgoes
Let's close it cause no one seems to have a stored a blockchain for gaia-7004. It will be prevented in future testnets.
Most helpful comment
Looks like same bug.
Related: