BUG REPORT
I170515 14:47:45.027210 1 util/log/clog.go:1011 [config] file created at: 2017/05/15 14:47:45
I170515 14:47:45.027210 1 util/log/clog.go:1011 [config] running on machine: GTune59
I170515 14:47:45.027210 1 util/log/clog.go:1011 [config] binary: CockroachDB CCL v1.0 (linux amd64, built 2017/05/10 13:19:57, go1.8.1)
I170515 14:47:45.027210 1 util/log/clog.go:1011 [config] arguments: [cockroach start --insecure]
postgresql 9.6.2package main
import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"
func main() {
db, err := sql.Open(`postgres`, `user=billions dbname=billions sslmode=disable`)
if err != nil {
log.Fatal(err)
return
}
fmt.Println(`test1: postgresql`)
_, err = db.Exec(`CREATE TABLE test1 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v TEXT)`)
if err != nil {
_, err = db.Exec(`TRUNCATE TABLE test1`)
if err != nil {
log.Fatal(err)
return
}
}
max := 9999
t := time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`INSERT INTO test1(k,v)VALUES('%05d','%05d')`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("INSERT: %v\n", time.Now().Sub(t))
t = time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`UPDATE test1 SET v = '%06d' WHERE k = '%05d'`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))
t = time.Now()
for y := 2; y < 39; y++ {
for x := max - 1; x > 0; x -= y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test1 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
for x := 1; x < max; x += y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test1 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
fmt.Print(`.`)
}
fmt.Printf("SELECT: %v\n", time.Now().Sub(t))
}
package main
import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"
func main() {
db, err := sql.Open(`postgres`, `user=billions dbname=billions sslmode=disable`)
if err != nil {
log.Fatal(err)
return
}
fmt.Println(`test2: postgresql jsonb`)
_, err = db.Exec(`CREATE TABLE test2 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v JSONB)`)
if err != nil {
_, err = db.Exec(`TRUNCATE TABLE test2`)
if err != nil {
log.Fatal(err)
return
}
}
max := 9999
t := time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`INSERT INTO test2(k,v)VALUES('%05d','{"v":"%05d"}')`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("INSERT: %v\n", time.Now().Sub(t))
t = time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`UPDATE test2 SET v = jsonb_merge(v,'{"v":"%06d"}') WHERE k = '%05d'`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))
t = time.Now()
for y := 2; y < 39; y++ {
for x := max - 1; x > 0; x -= y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test2 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
for x := 1; x < max; x += y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test2 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
fmt.Print(`.`)
}
fmt.Printf("SELECT: %v\n", time.Now().Sub(t))
}
md5-2bdd7d32ee4e761ff6b1efdc83368a29
package main
import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"
func main() {
db, err := sql.Open(`postgres`, `postgresql://test3@localhost:26257/test3?sslmode=disable`)
if err != nil {
log.Fatal(err)
return
}
fmt.Println(`test3: cockroach`)
_, err = db.Exec(`CREATE TABLE test3 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v TEXT)`)
if err != nil {
_, err = db.Exec(`TRUNCATE TABLE test3`)
if err != nil {
log.Fatal(err)
return
}
}
max := 9999
t := time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`INSERT INTO test3(k,v)VALUES('%05d','%05d')`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("INSERT: %v\n", time.Now().Sub(t))
t = time.Now()
for x := 1; x < max; x++ {
_, err = db.Exec(fmt.Sprintf(`UPDATE test3 SET v = '%06d' WHERE k = '%05d'`, x, x))
if err != nil {
log.Fatal(err)
return
}
if x % 100 == 0 {
fmt.Print(`.`)
}
}
fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))
t = time.Now()
for y := 2; y < 39; y++ {
for x := max - 1; x > 0; x -= y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test3 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
for x := 1; x < max; x += y {
rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test3 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
if err != nil {
log.Fatal(err)
return
}
for rows.Next() {}
rows.Close()
}
fmt.Print(`.`)
}
fmt.Printf("SELECT: %v\n", time.Now().Sub(t))
}
md5-7329a502bb3990ddc6250c52fb22a8b9
test1: postgresql
INSERT: 35.613299388s
UPDATE: 36.34266642s
SELECT: 7.765492499s
CPU: 2.64s Real: 80.03s RAM: 54 912 KB
test2: postgresql jsonb
INSERT: 33.829729621s
UPDATE: 37.909535623s
SELECT: 8.136418723s
CPU: 2.55s Real: 80.15s RAM: 56 076 KB
test3: cockroach
INSERT: 1m21.098243074s
UPDATE: 1m27.374010505s
SELECT: 22m59.672499752s
CPU: 7.13s Real: 1550.45s RAM: 56 648 KB
@kokizzu Thanks for the report and test code. I'll take a look at this shortly. Just to clarify, you were running a 1 node cockroach cluster, correct?
Yes, just one node
I'm still looking at the performance diff for the INSERT and UPDATE cases. For the SELECT case, the schema is forcing the use of an index scan and then an index join to find the row data (i.e. id and v).
root@:26257/> EXPLAIN (PLAN) SELECT * FROM test3.test3 WHERE k >= '00020' ORDER BY k ASC LIMIT 20;
+-------+------------+-------+-------------------+
| Level | Type | Field | Description |
+-------+------------+-------+-------------------+
| 0 | limit | | |
| 1 | index-join | | |
| 2 | scan | | |
| 2 | | table | test3@test3_k_key |
| 2 | | spans | /"00020"- |
| 2 | | limit | 20 |
| 2 | scan | | |
| 2 | | table | test3@primary |
+-------+------------+-------+-------------------+
@RaduBerinde or @knz Can you investigate why this is so horrifically slow?
@kokizzu Is there any reason you're using id BIGSERIAL for your primary key? Using k TEXT for your primary key would avoid the index join mentioned above.
@petermattis hi, I normally have 2 keys, 1st is primary key (bigint - that will be joined to another table), 2nd is unique text (for human-memory lookup, for example: student id, subject id, fcm unique id, imei, or any other "key" that possible to change), so before i convert my PostgreSQL-based project to either CockroachDB or ScyllaDB (because my next project needs massive-write capability), i need to test the basic performance for these simple cases.
@RaduBerinde, @knz --vmodule=rowfetcher=2,index_join=3 shows that we're retrieving 100 keys from test3_k_key and doing a lookup in the primary table for all 100, despite the limit being set to 20. This lookup of 100 keys seems to be independent of the limit: using LIMIT 1 also results in 100 keys being retrieved from the index.
Yes correct the index join has (currently) a batch size of 100 rows.
Since the case where LIMIT is smaller than 100 is likely very common, it's worthwhile having a special case for that.
I'll make the PR with that change.
The index join batch size was only intended to control how many rows were fetched from the index before looking up in the primary. I don't believe that value propagates anywhere else.
Good catch. I was looking and everything seemed expected in terms of query plans (we are using the index for the reverse version of the query as well, and these aren't going through distsql).
Ah, @knz might be correct that index-join is the problem. When evaluating the query we do 2 scans from the index, the first for 21 keys and the second for 1000 keys. I'm not clear why we're not actually retrieving more than 100 keys.
@petermattis can you explain how you observed the two scans? I instrumented the code and I can see one batch on system.descriptor for 10000 rows, one batch on the index for 21 rows and 20 batches on the table with no row limit but a key range that should only match 1 row.
diff --git a/pkg/sql/sqlbase/kvfetcher.go b/pkg/sql/sqlbase/kvfetcher.go
index 035613d..3c4da81 100644
--- a/pkg/sql/sqlbase/kvfetcher.go
+++ b/pkg/sql/sqlbase/kvfetcher.go
@@ -27,6 +27,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
)
// PrettyKey pretty-prints the specified key, skipping over the first `skip`
@@ -215,6 +216,8 @@ func makeKVFetcher(
func (f *kvFetcher) fetch(ctx context.Context) error {
batchSize := f.getBatchSize()
+ log.Infof(ctx, "kvFetcher.fetch %s: batchSize=%d", f.spans[0], batchSize)
+
b := &f.batch
*b = client.Batch{}
b.Header.MaxSpanRequestKeys = batchSize
Sample output:
I170515 11:59:42.250439 612 sql/executor.go:1153 [client=[::1]:49642,user=root,n1] SELECT * FROM test3 WHERE k >= '09410' ORDER BY k ASC LIMIT 20
I170515 11:59:42.250531 612 sql/sqlbase/kvfetcher.go:219 [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/{2/"09410"-3}: batchSize=21
I170515 11:59:42.251112 612 sql/sqlbase/kvfetcher.go:219 [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/{2/"09431"/0-3}: batchSize=1000
I170515 11:59:42.253133 612 sql/sqlbase/kvfetcher.go:219 [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/1/2450835746876948{49-50}: batchSize=0
ack. I was wrongly looking at the reverse select.
FWIW I altered the test program to split the forward and reverse selects, and the problem is in the forward case:
test3: cockroach
INSERT: 526.692321ms
UPDATE: 529.812044ms
SELECT: 29.296383294s
SELECT REV: 2.395376952s
Ah, we're actually retrieving 1000 keys for the second scan from the index, but only printing 100 with --vmodule=rowfetcher=2 because that is the number of calls to RowFetcher.ProcessKV.
FWIW, with EXPLAIN(TRACE) the query behaves properly, and without EXPLAIN it behaves wrongly. There's something fishy in there :)
@knz Do you have some local edits going on? I see changed max to 999 and see:
test3: cockroach
.........INSERT: 473.282646ms
.........UPDATE: 686.924628ms
.....................................SELECT: 8.170576229s
.....................................SELECT REV: 24.563203483s
If I locally set indexJoinBatchSize to 20 I see:
test3: cockroach
.........INSERT: 472.581092ms
.........UPDATE: 691.129455ms
.....................................SELECT: 2.520680796s
.....................................SELECT REV: 5.389766717s
I do not use local edits, but I am using the master branch not the 1.0 release.
I found a likely cause: the current code in the index join will happily request more rows from the index scanner beyond the 21 initially fetched, and since the 2nd batch is reinitialized it does not capture the limit set on the scan node and uses the default instead.
See #15925 for a tentative fix.
FYI, on my machine before that patch:
test3: cockroach
.........INSERT: 2.870132161s
.........UPDATE: 3.436381406s
.....................................SELECT: 2m26.094474427s
.....................................SELECT REV: 54.633160055s
After the patch:
test3: cockroach
.........INSERT: 3.344008224s
.........UPDATE: 3.44719187s
.....................................SELECT: 9.272870882s
.....................................SELECT REV: 6.414932226s
I am curious about the difference between forward and reverse? Why is one so much more affected by this change? Seems like in both cases we will request a second batch of 1,000.
And I'm very confused how SELECT REV is possibly faster than SELECT. It never is for me in my testing.
I don't know how to explain this sorry. I promise this is just a plain build on my machine...
Anyway #15925 is merged, and will greatly alleviate the performance mismatch reported initially.
@kokizzu With Postgres, primary key vs non-primary key indexes are essentially the same speed. With CockroachDB, primary key indexes are significantly faster to access unless you ensure the non-primary key index contains all of the data you are querying. After #15925, I see the following numbers using test3 (with a change to separate the forward and reverse scan numbers):
test3: cockroach
...................................................................................................INSERT: 4.687891145s
...................................................................................................UPDATE: 7.2442223s
.....................................SELECT: 26.402950452s
.....................................SELECT REV: 55.780732459s
If I change the table definition to CREATE TABLE test3 (id BIGSERIAL UNIQUE, k TEXT PRIMARY KEY, v TEXT), the numbers become:
test3: cockroach
...................................................................................................INSERT: 4.670398907s
...................................................................................................UPDATE: 6.273768495s
.....................................SELECT: 11.77348784s
.....................................SELECT REV: 25.385859572s
Is there more work to be done here? Perhaps we should file followup issues to improve specifics, but I don't know how valuable it is to keep this open.
I'm not aware of anything else to do here. The egregious performance difference was fixed.
Most helpful comment
I'm not aware of anything else to do here. The egregious performance difference was fixed.