Cockroach: perf: slow performance compared to PostgreSQL

Created on 15 May 2017 · 25Comments · Source: cockroachdb/cockroach

BUG REPORT

Version

I170515 14:47:45.027210 1 util/log/clog.go:1011  [config] file created at: 2017/05/15 14:47:45
I170515 14:47:45.027210 1 util/log/clog.go:1011  [config] running on machine: GTune59
I170515 14:47:45.027210 1 util/log/clog.go:1011  [config] binary: CockroachDB CCL v1.0 (linux amd64, built 2017/05/10 13:19:57, go1.8.1)
I170515 14:47:45.027210 1 util/log/clog.go:1011  [config] arguments: [cockroach start --insecure]

Please describe the issue you observed:

What did you do?
comparing with postgresql 9.6.2

package main

import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"

func main() {

    db, err := sql.Open(`postgres`, `user=billions dbname=billions sslmode=disable`)
    if err != nil {
        log.Fatal(err)
        return
    }
    fmt.Println(`test1: postgresql`)
    _, err = db.Exec(`CREATE TABLE test1 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v TEXT)`)
    if err != nil {
        _, err = db.Exec(`TRUNCATE TABLE test1`)
        if err != nil {
            log.Fatal(err)
            return
        }
    }

    max := 9999
    t := time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`INSERT INTO test1(k,v)VALUES('%05d','%05d')`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("INSERT: %v\n", time.Now().Sub(t))

    t = time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`UPDATE test1 SET v = '%06d' WHERE k = '%05d'`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))

    t = time.Now()
    for y := 2; y < 39; y++ {
        for x := max - 1; x > 0; x -= y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test1 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        for x := 1; x < max; x += y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test1 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        fmt.Print(`.`)
    }
    fmt.Printf("SELECT: %v\n", time.Now().Sub(t))

}

package main

import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"

func main() {

    db, err := sql.Open(`postgres`, `user=billions dbname=billions sslmode=disable`)
    if err != nil {
        log.Fatal(err)
        return
    }
    fmt.Println(`test2: postgresql jsonb`)
    _, err = db.Exec(`CREATE TABLE test2 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v JSONB)`)
    if err != nil {
        _, err = db.Exec(`TRUNCATE TABLE test2`)
        if err != nil {
            log.Fatal(err)
            return
        }
    }

    max := 9999
    t := time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`INSERT INTO test2(k,v)VALUES('%05d','{"v":"%05d"}')`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("INSERT: %v\n", time.Now().Sub(t))

    t = time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`UPDATE test2 SET v = jsonb_merge(v,'{"v":"%06d"}') WHERE k = '%05d'`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))

    t = time.Now()
    for y := 2; y < 39; y++ {
        for x := max - 1; x > 0; x -= y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test2 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        for x := 1; x < max; x += y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test2 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        fmt.Print(`.`)
    }
    fmt.Printf("SELECT: %v\n", time.Now().Sub(t))

}



md5-2bdd7d32ee4e761ff6b1efdc83368a29



package main

import "fmt"
import _ "github.com/lib/pq"
import "time"
import "log"
import "database/sql"

func main() {

    db, err := sql.Open(`postgres`, `postgresql://test3@localhost:26257/test3?sslmode=disable`)
    if err != nil {
        log.Fatal(err)
        return
    }
    fmt.Println(`test3: cockroach`)
    _, err = db.Exec(`CREATE TABLE test3 (id BIGSERIAL PRIMARY KEY, k TEXT UNIQUE, v TEXT)`)
    if err != nil {
        _, err = db.Exec(`TRUNCATE TABLE test3`)
        if err != nil {
            log.Fatal(err)
            return
        }
    }

    max := 9999
    t := time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`INSERT INTO test3(k,v)VALUES('%05d','%05d')`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("INSERT: %v\n", time.Now().Sub(t))

    t = time.Now()
    for x := 1; x < max; x++ {
        _, err = db.Exec(fmt.Sprintf(`UPDATE test3 SET v = '%06d' WHERE k = '%05d'`, x, x))
        if err != nil {
            log.Fatal(err)
            return
        }
        if x % 100 == 0 {
            fmt.Print(`.`)
        }
    }
    fmt.Printf("UPDATE: %v\n", time.Now().Sub(t))

    t = time.Now()
    for y := 2; y < 39; y++ {
        for x := max - 1; x > 0; x -= y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test3 WHERE k >= '%05d' ORDER BY k ASC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        for x := 1; x < max; x += y {
            rows, err := db.Query(fmt.Sprintf(`SELECT * FROM test3 WHERE k <= '%05d' ORDER BY k DESC LIMIT 20`, x))
            if err != nil {
                log.Fatal(err)
                return
            }
            for rows.Next() {}
            rows.Close()
        }
        fmt.Print(`.`)
    }
    fmt.Printf("SELECT: %v\n", time.Now().Sub(t))

}



md5-7329a502bb3990ddc6250c52fb22a8b9



test1: postgresql
INSERT: 35.613299388s
UPDATE: 36.34266642s
SELECT: 7.765492499s
CPU: 2.64s      Real: 80.03s    RAM: 54 912 KB

test2: postgresql jsonb
INSERT: 33.829729621s
UPDATE: 37.909535623s
SELECT: 8.136418723s
CPU: 2.55s      Real: 80.15s    RAM: 56 076 KB

test3: cockroach
INSERT: 1m21.098243074s
UPDATE: 1m27.374010505s
SELECT: 22m59.672499752s
CPU: 7.13s      Real: 1550.45s  RAM: 56 648 KB

C-performance C-question O-community

Source

kokizzu

Most helpful comment

I'm not aware of anything else to do here. The egregious performance difference was fixed.

petermattis on 5 Feb 2018

🎉3

All 25 comments

@kokizzu Thanks for the report and test code. I'll take a look at this shortly. Just to clarify, you were running a 1 node cockroach cluster, correct?

petermattis on 15 May 2017

Yes, just one node

kokizzu on 15 May 2017

I'm still looking at the performance diff for the INSERT and UPDATE cases. For the SELECT case, the schema is forcing the use of an index scan and then an index join to find the row data (i.e. id and v).

root@:26257/> EXPLAIN (PLAN) SELECT * FROM test3.test3 WHERE k >= '00020' ORDER BY k ASC LIMIT 20;
+-------+------------+-------+-------------------+
| Level |    Type    | Field |    Description    |
+-------+------------+-------+-------------------+
|     0 | limit      |       |                   |
|     1 | index-join |       |                   |
|     2 | scan       |       |                   |
|     2 |            | table | test3@test3_k_key |
|     2 |            | spans | /"00020"-         |
|     2 |            | limit |                20 |
|     2 | scan       |       |                   |
|     2 |            | table | test3@primary     |
+-------+------------+-------+-------------------+

@RaduBerinde or @knz Can you investigate why this is so horrifically slow?

@kokizzu Is there any reason you're using id BIGSERIAL for your primary key? Using k TEXT for your primary key would avoid the index join mentioned above.

petermattis on 15 May 2017

@petermattis hi, I normally have 2 keys, 1st is primary key (bigint - that will be joined to another table), 2nd is unique text (for human-memory lookup, for example: student id, subject id, fcm unique id, imei, or any other "key" that possible to change), so before i convert my PostgreSQL-based project to either CockroachDB or ScyllaDB (because my next project needs massive-write capability), i need to test the basic performance for these simple cases.

kokizzu on 15 May 2017

@RaduBerinde, @knz --vmodule=rowfetcher=2,index_join=3 shows that we're retrieving 100 keys from test3_k_key and doing a lookup in the primary table for all 100, despite the limit being set to 20. This lookup of 100 keys seems to be independent of the limit: using LIMIT 1 also results in 100 keys being retrieved from the index.

petermattis on 15 May 2017

Yes correct the index join has (currently) a batch size of 100 rows.

Since the case where LIMIT is smaller than 100 is likely very common, it's worthwhile having a special case for that.

knz on 15 May 2017

I'll make the PR with that change.

knz on 15 May 2017

The index join batch size was only intended to control how many rows were fetched from the index before looking up in the primary. I don't believe that value propagates anywhere else.

petermattis on 15 May 2017

Good catch. I was looking and everything seemed expected in terms of query plans (we are using the index for the reverse version of the query as well, and these aren't going through distsql).

RaduBerinde on 15 May 2017

Ah, @knz might be correct that index-join is the problem. When evaluating the query we do 2 scans from the index, the first for 21 keys and the second for 1000 keys. I'm not clear why we're not actually retrieving more than 100 keys.

petermattis on 15 May 2017

@petermattis can you explain how you observed the two scans? I instrumented the code and I can see one batch on system.descriptor for 10000 rows, one batch on the index for 21 rows and 20 batches on the table with no row limit but a key range that should only match 1 row.

knz on 15 May 2017

diff --git a/pkg/sql/sqlbase/kvfetcher.go b/pkg/sql/sqlbase/kvfetcher.go
index 035613d..3c4da81 100644
--- a/pkg/sql/sqlbase/kvfetcher.go
+++ b/pkg/sql/sqlbase/kvfetcher.go
@@ -27,6 +27,7 @@ import (

        "github.com/cockroachdb/cockroach/pkg/internal/client"
        "github.com/cockroachdb/cockroach/pkg/roachpb"
+       "github.com/cockroachdb/cockroach/pkg/util/log"
 )

 // PrettyKey pretty-prints the specified key, skipping over the first `skip`
@@ -215,6 +216,8 @@ func makeKVFetcher(
 func (f *kvFetcher) fetch(ctx context.Context) error {
        batchSize := f.getBatchSize()

+       log.Infof(ctx, "kvFetcher.fetch %s: batchSize=%d", f.spans[0], batchSize)
+
        b := &f.batch
        *b = client.Batch{}
        b.Header.MaxSpanRequestKeys = batchSize

Sample output:

I170515 11:59:42.250439 612 sql/executor.go:1153  [client=[::1]:49642,user=root,n1] SELECT * FROM test3 WHERE k >= '09410' ORDER BY k ASC LIMIT 20
I170515 11:59:42.250531 612 sql/sqlbase/kvfetcher.go:219  [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/{2/"09410"-3}: batchSize=21
I170515 11:59:42.251112 612 sql/sqlbase/kvfetcher.go:219  [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/{2/"09431"/0-3}: batchSize=1000
I170515 11:59:42.253133 612 sql/sqlbase/kvfetcher.go:219  [client=[::1]:49642,user=root,n1] kvFetcher.fetch /Table/51/1/2450835746876948{49-50}: batchSize=0

petermattis on 15 May 2017

ack. I was wrongly looking at the reverse select.
FWIW I altered the test program to split the forward and reverse selects, and the problem is in the forward case:

test3: cockroach
INSERT: 526.692321ms
UPDATE: 529.812044ms
SELECT: 29.296383294s
SELECT REV: 2.395376952s

knz on 15 May 2017

Ah, we're actually retrieving 1000 keys for the second scan from the index, but only printing 100 with --vmodule=rowfetcher=2 because that is the number of calls to RowFetcher.ProcessKV.

petermattis on 15 May 2017

FWIW, with EXPLAIN(TRACE) the query behaves properly, and without EXPLAIN it behaves wrongly. There's something fishy in there :)

knz on 15 May 2017

@knz Do you have some local edits going on? I see changed max to 999 and see:

test3: cockroach
.........INSERT: 473.282646ms
.........UPDATE: 686.924628ms
.....................................SELECT: 8.170576229s
.....................................SELECT REV: 24.563203483s

If I locally set indexJoinBatchSize to 20 I see:

test3: cockroach
.........INSERT: 472.581092ms
.........UPDATE: 691.129455ms
.....................................SELECT: 2.520680796s
.....................................SELECT REV: 5.389766717s

petermattis on 15 May 2017

I do not use local edits, but I am using the master branch not the 1.0 release.
I found a likely cause: the current code in the index join will happily request more rows from the index scanner beyond the 21 initially fetched, and since the 2nd batch is reinitialized it does not capture the limit set on the scan node and uses the default instead.

knz on 15 May 2017

See #15925 for a tentative fix.

knz on 15 May 2017

❤1

FYI, on my machine before that patch:

test3: cockroach
.........INSERT: 2.870132161s
.........UPDATE: 3.436381406s
.....................................SELECT: 2m26.094474427s
.....................................SELECT REV: 54.633160055s

After the patch:

test3: cockroach
.........INSERT: 3.344008224s
.........UPDATE: 3.44719187s
.....................................SELECT: 9.272870882s
.....................................SELECT REV: 6.414932226s

knz on 15 May 2017

I am curious about the difference between forward and reverse? Why is one so much more affected by this change? Seems like in both cases we will request a second batch of 1,000.

RaduBerinde on 15 May 2017

And I'm very confused how SELECT REV is possibly faster than SELECT. It never is for me in my testing.

petermattis on 15 May 2017

I don't know how to explain this sorry. I promise this is just a plain build on my machine...

Anyway #15925 is merged, and will greatly alleviate the performance mismatch reported initially.

knz on 16 May 2017

@kokizzu With Postgres, primary key vs non-primary key indexes are essentially the same speed. With CockroachDB, primary key indexes are significantly faster to access unless you ensure the non-primary key index contains all of the data you are querying. After #15925, I see the following numbers using test3 (with a change to separate the forward and reverse scan numbers):

test3: cockroach
...................................................................................................INSERT: 4.687891145s
...................................................................................................UPDATE: 7.2442223s
.....................................SELECT: 26.402950452s
.....................................SELECT REV: 55.780732459s

If I change the table definition to CREATE TABLE test3 (id BIGSERIAL UNIQUE, k TEXT PRIMARY KEY, v TEXT), the numbers become:

test3: cockroach
...................................................................................................INSERT: 4.670398907s
...................................................................................................UPDATE: 6.273768495s
.....................................SELECT: 11.77348784s
.....................................SELECT REV: 25.385859572s

petermattis on 16 May 2017

Is there more work to be done here? Perhaps we should file followup issues to improve specifics, but I don't know how valuable it is to keep this open.