when query in grafana

run in docker , using demo docker-copose.yml
version: "3"
networks:
loki:
services:
loki:
image: grafana/loki:master
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml
networks:
- loki
grafana:
image: grafana/grafana:master
ports:
- "3000:3000"
environment:
GF_EXPLORE_ENABLED: "true"
networks:
- loki
promtail docker-compose.yml
version: "3"
networks:
loki:
services:
promtail:
image: grafana/promtail:master
container_name: tail
volumes:
- /data/nspm:/data/nspm
- ./docker-config.yaml:/etc/promtail/docker-config.yaml
command:
-config.file=/etc/promtail/docker-config.yaml
networks:
- loki
docker-config.yaml
server:
http_listen_port: 0
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
client:
url: http://worksite.snc.360es.cn:3100/api/prom/push
scrape_configs:
- job_name: system
entry_parser: raw
static_configs:
- targets:
- localhost
labels:
job: logs
appname: nspm-gateway
__path__: /data/nspm/nspm-gateway
error log
level=info ts=2018-12-29T08:15:16.326097187Z caller=lifecycler.go:290 msg="auto-joining cluster after timeout"
level=warn ts=2018-12-29T08:15:16.46305657Z caller=gokit.go:43 msg="POST /api/prom/push (500) 316.146碌s Response: \"empty ring\\n\" ws: false; Accept-Encoding: gzip; Content-Length: 3603; Con
tent-Type: application/x-protobuf; User-Agent: Go-http-client/1.1; "
ts=2018-12-29T08:16:45.564717815Z caller=spanlogger.go:36 org_id=fake method=SeriesStore.Get level=error msg=lookupChunksBySeries err="invalid chunk time range value: 656432313632626636653a31
363766616439306437393a31363766616439306437393a343931303839643700330066616b653a6431373839343a667544663954693452"
level=warn ts=2018-12-29T08:16:50.026345336Z caller=gokit.go:43 msg="POST /api/prom/push (500) 5.003725029s Response: \"rpc error: code = DeadlineExceeded desc = context deadline exceeded\\n\
" ws: false; Accept-Encoding: gzip; Content-Length: 21234; Content-Type: application/x-protobuf; User-Agent: Go-http-client/1.1; "
fatal error: runtime: out of memory
runtime stack:
runtime.throw(0x117f073, 0x16)
/usr/local/go/src/runtime/panic.go:608 +0x72
runtime.sysMap(0xc174000000, 0x5c000000, 0x1c78818)
/usr/local/go/src/runtime/mem_linux.go:156 +0xc7
runtime.(*mheap).sysAlloc(0x1c5f000, 0x5c000000, 0x80008c7378, 0xc0005a9e30)
/usr/local/go/src/runtime/malloc.go:619 +0x1c7
runtime.(*mheap).grow(0x1c5f000, 0x2df77, 0x0)
/usr/local/go/src/runtime/mheap.go:920 +0x42
runtime.(*mheap).allocSpanLocked(0x1c5f000, 0x2df77, 0x1c78828, 0x0)
/usr/local/go/src/runtime/mheap.go:848 +0x337
runtime.(*mheap).alloc_m(0x1c5f000, 0x2df77, 0xffffffffffff0101, 0xc0005a9f10)
/usr/local/go/src/runtime/mheap.go:692 +0x119
runtime.(*mheap).alloc.func1()
/usr/local/go/src/runtime/mheap.go:759 +0x4c
runtime.(*mheap).alloc(0x1c5f000, 0x2df77, 0xc000000101, 0x424fd0)
/usr/local/go/src/runtime/mheap.go:758 +0x8a
runtime.largeAlloc(0x5beee000, 0x450100, 0xc052540000)
/usr/local/go/src/runtime/malloc.go:1019 +0x97
runtime.mallocgc.func1()
/usr/local/go/src/runtime/malloc.go:914 +0x46
runtime.systemstack(0x1)
/usr/local/go/src/runtime/asm_amd64.s:351 +0x66
runtime.mstart()
/usr/local/go/src/runtime/proc.go:1229
level=warn ts=2018-12-29T08:14:43.869457718Z caller=gokit.go:46 method=/logproto.Pusher/Push duration=148.948碌s request="&PushRequest{Streams:[&Stream{Labels:{__filename__=\"/data/nspm/nspm-gateway/gateway-2018-12-30.log\", appname=\"nspm-gateway\", job=\"logs\"},Entries:[{2018-12-29 08:14:42.819230203 +0000 UTC 2018-12-29 16:14:42.813 [RxNewThreadScheduler-6] INFO cn.b360.irp.device.data.producer.service.DeviceDataSyncService.[addDeviceData] Line: 115 - Add device data.[deviceId=UviiQnYz6ve2Jn2EF3FbIzai, version=1]} {2018-12-29 08:14:42.819235307 +0000 UTC 2018-12-29 16:14:42.813 [RxNewThreadScheduler-6] INFO cn.b360.irp.device.data.producer.service.DeviceDataSyncService.[saveAllData] Line: 70 - Device[UviiQnYz6ve2Jn2EF3FbIzai] is unlocked[java.util.concurrent.locks.ReentrantLock@6c8577d4[Unlocked]].}],}],}" err="Entry out of order" msg="gRPC\n"
level=warn ts=2018-12-29T08:14:43.869911357Z caller=gokit.go:43 msg="POST /api/prom/push (500) 1.784836ms Response: \"rpc error: code = Unknown desc = Entry out of order\\n\" ws: false; Accept-Encoding: gzip; Content-Length: 421; Content-Type: application/x-protobuf; User-Agent: Go-http-client/1.1; "
level=info ts=2018-12-29T08:15:16.317227949Z caller=loki.go:121 msg=initialising module=store
level=info ts=2018-12-29T08:15:16.317346632Z caller=override.go:33 msg="per-tenant overides disabled"
level=info ts=2018-12-29T08:15:16.31765739Z caller=loki.go:121 msg=initialising module=server
level=info ts=2018-12-29T08:15:16.324060523Z caller=gokit.go:36 http=[::]:3100 grpc=[::]:9095 msg="server listening on addresses"
level=info ts=2018-12-29T08:15:16.324655604Z caller=loki.go:121 msg=initialising module=ring
level=info ts=2018-12-29T08:15:16.324831791Z caller=loki.go:121 msg=initialising module=querier
level=error ts=2018-12-29T08:15:16.325277072Z caller=consul_client.go:182 msg="error getting path" key=ring err=null
level=info ts=2018-12-29T08:15:16.32549133Z caller=loki.go:121 msg=initialising module=ingester
level=info ts=2018-12-29T08:15:16.325638937Z caller=loki.go:121 msg=initialising module=distributor
level=info ts=2018-12-29T08:15:16.325698536Z caller=loki.go:121 msg=initialising module=all
level=info ts=2018-12-29T08:15:16.325726742Z caller=main.go:45 msg="Starting Loki" version="(version=master-29cbd06, branch=master, revision=29cbd06)"
level=info ts=2018-12-29T08:15:16.32577048Z caller=lifecycler.go:360 msg="entry not found in ring, adding with no tokens"
level=info ts=2018-12-29T08:15:16.326097187Z caller=lifecycler.go:290 msg="auto-joining cluster after timeout"
level=warn ts=2018-12-29T08:15:16.46305657Z caller=gokit.go:43 msg="POST /api/prom/push (500) 316.146碌s Response: \"empty ring\\n\" ws: false; Accept-Encoding: gzip; Content-Length: 3603; Content-Type: application/x-protobuf; User-Agent: Go-http-client/1.1; "
ts=2018-12-29T08:16:45.564717815Z caller=spanlogger.go:36 org_id=fake method=SeriesStore.Get level=error msg=lookupChunksBySeries err="invalid chunk time range value: 656432313632626636653a31363766616439306437393a31363766616439306437393a343931303839643700330066616b653a6431373839343a667544663954693452"
level=warn ts=2018-12-29T08:16:50.026345336Z caller=gokit.go:43 msg="POST /api/prom/push (500) 5.003725029s Response: \"rpc error: code = DeadlineExceeded desc = context deadline exceeded\\n\" ws: false; Accept-Encoding: gzip; Content-Length: 21234; Content-Type: application/x-protobuf; User-Agent: Go-http-client/1.1; "
fatal error: runtime: out of memory
invalid chunk time range seems the problem

Memory usage continues to grow
I believe this is fixed now (see #209)
I use the docker image which is created on 2/16, still have this problem.
I'm also having this problem. Using latest docker image.
It happens for specific queries constantly, no matter how small a time range.
For what it's worth, I'm not seeing any memory issues running grafana/loki:latest using these helm chart values:

In my case it is caused by specific queries in Grafana. If i'd want to debug it further, where should I start to lok at, in the code?
you can run loki and then use logcli to query.
I'm running into this as well. It seems that certain queries initiate the crash, but not always. It seems that the queries can sometimes be safely run a little while after a redeploy. And running the same query via logcli doesn't appear to trigger the OOM error, but running it via Grafana: Explore does.
I was just able to reproduce using logcli:
2019/05/10 09:21:03 Query failed: Get http://172.25.4.213:3100/api/prom/query?query=%7Bjob%3D%22mcServiceLogs%22%7D&limit=100&start=1557494449382939814&end=1557498049382939814&direction=BACKWARD®exp=: EOF
Snippet of kern.log in case that's useful - basically just a long list of loki invoking oom-killer until it finally gets killed itself:
May 10 14:48:19 ip-172-25-4-213 kernel: [4219389.089984] loki invoked oom-killer: gfp_mask=0x14200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, oom_score_adj=0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.089986] loki cpuset=/ mems_allowed=0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.089992] CPU: 3 PID: 23337 Comm: loki Not tainted 4.15.0-1021-aws #21-Ubuntu
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.089993] Hardware name: Amazon EC2 t3.xlarge/, BIOS 1.0 10/16/2017
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.089994] Call Trace:
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090002] dump_stack+0x63/0x8b
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090007] dump_header+0x71/0x285
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090009] oom_kill_process+0x220/0x440
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090011] out_of_memory+0x2d1/0x4f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090013] __alloc_pages_slowpath+0xa90/0xe50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090018] ? alloc_pages_current+0x6a/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090020] __alloc_pages_nodemask+0x263/0x280
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090022] alloc_pages_current+0x6a/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090024] __page_cache_alloc+0x81/0xa0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090026] filemap_fault+0x378/0x6f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090028] ? filemap_map_pages+0x181/0x390
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090032] ext4_filemap_fault+0x31/0x44
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090036] __do_fault+0x24/0xf0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090037] handle_pte_fault+0x883/0xd30
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090041] ? __hrtimer_init+0xc0/0xc0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090043] __handle_mm_fault+0x478/0x5c0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090045] handle_mm_fault+0xb1/0x1f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090049] __do_page_fault+0x250/0x4d0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090051] do_page_fault+0x2e/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090056] ? async_page_fault+0x2f/0x50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090059] do_async_page_fault+0x51/0x80
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090061] async_page_fault+0x45/0x50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090063] RIP: 0033:0x45d553
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090064] RSP: 002b:00007f7f9cff8d40 EFLAGS: 00010206
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090066] RAX: ffffffffffffff92 RBX: 00000000000186a0 RCX: 000000000045d553
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090067] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001bcecc8
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090068] RBP: 00007f7f9cff8d88 R08: 0000000000000000 R09: 0000000000000000
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090069] R10: 00007f7f9cff8d78 R11: 0000000000000206 R12: 00000000004a9d32
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090070] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090072] Mem-Info:
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] active_anon:3968517 inactive_anon:20 isolated_anon:0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] active_file:77 inactive_file:55 isolated_file:0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] unevictable:1352 dirty:0 writeback:0 unstable:0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] slab_reclaimable:11882 slab_unreclaimable:15781
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] mapped:1067 shmem:216 pagetables:9079 bounce:0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090076] free:33517 free_pcp:90 free_cma:0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090079] Node 0 active_anon:15874068kB inactive_anon:80kB active_file:308kB inactive_file:220kB unevictable:5408kB iso
lated(anon):0kB isolated(file):0kB mapped:4268kB dirty:0kB writeback:0kB shmem:864kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 2048kB writeback_tmp:0kB unstable:0
kB all_unreclaimable? no
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090080] Node 0 DMA free:15908kB min:64kB low:80kB high:96kB active_anon:0kB inactive_anon:0kB active_file:0kB inactiv
e_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma
:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090084] lowmem_reserve[]: 0 2972 15815 15815 15815
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090088] Node 0 DMA32 free:63744kB min:12688kB low:15860kB high:19032kB active_anon:2993692kB inactive_anon:0kB active
_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:3129320kB managed:3063752kB mlocked:0kB kernel_stack:16kB pagetables:5952kB bounce:0kB free_pcp:0
kB local_pcp:0kB free_cma:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090091] lowmem_reserve[]: 0 0 12843 12843 12843
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090095] Node 0 Normal free:54416kB min:54824kB low:68528kB high:82232kB active_anon:12879928kB inactive_anon:80kB act
ive_file:408kB inactive_file:108kB unevictable:5408kB writepending:0kB present:13438976kB managed:13155868kB mlocked:5408kB kernel_stack:5168kB pagetables:30364kB bou
nce:0kB free_pcp:360kB local_pcp:0kB free_cma:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090099] lowmem_reserve[]: 0 0 0 0 0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090102] Node 0 DMA: 1*4kB (U) 0*8kB 0*16kB 1*32kB (U) 2*64kB (U) 1*128kB (U) 1*256kB (U) 0*512kB 1*1024kB (U) 1*2048k
B (M) 3*4096kB (M) = 15908kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090115] Node 0 DMA32: 11*4kB (UM) 6*8kB (UM) 5*16kB (UM) 4*32kB (UM) 4*64kB (UM) 2*128kB (UM) 1*256kB (U) 1*512kB (U)
1*1024kB (U) 0*2048kB 15*4096kB (ME) = 64044kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090129] Node 0 Normal: 639*4kB (UEH) 503*8kB (UEH) 492*16kB (UMEH) 457*32kB (UEH) 175*64kB (UEH) 60*128kB (UME) 12*25
6kB (UE) 8*512kB (UME) 0*1024kB 0*2048kB 0*4096kB = 55124kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090257] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090258] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090258] 1300 total pagecache pages
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090259] 0 pages in swap cache
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090260] Swap cache stats: add 0, delete 0, find 0/0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090261] Free swap = 0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090262] Total swap = 0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090262] 4146072 pages RAM
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090263] 0 pages HighMem/MovableOnly
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090263] 87190 pages reserved
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090264] 0 pages cma reserved
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090265] 0 pages hwpoisoned
ay 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090265] [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090276] [ 431] 0 431 26476 246 94208 0 0 lvmetad
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090279] [ 741] 103 741 12574 531 143360 0 -900 dbus-daemon
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090281] [ 748] 0 748 77203 427 102400 0 0 lxcfs
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090283] [ 752] 0 752 72495 816 188416 0 0 accounts-daemon
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090285] [ 759] 0 759 7937 611 102400 0 0 cron
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090287] [ 762] 0 762 17815 883 180224 0 0 systemd-logind
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090289] [ 799] 0 799 7083 507 102400 0 0 atd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090291] [ 863] 0 863 4103 440 81920 0 0 agetty
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090293] [ 868] 0 868 3722 376 69632 0 0 agetty
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090295] [ 1520] 1000 1520 19187 533 192512 0 0 systemd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090298] [ 1529] 1000 1529 64837 626 262144 0 0 (sd-pam)
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090301] [ 6384] 106 6384 7148 96 98304 0 0 uuidd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090303] [12531] 0 12531 27619 486 110592 0 0 irqbalance
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090305] [12572] 102 12572 66818 375 180224 0 0 rsyslogd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090307] [12969] 0 12969 6344 58 86016 0 0 iscsid
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090309] [12971] 0 12971 6470 1329 94208 0 -17 iscsid
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090311] [13213] 0 13213 42703 2527 229376 0 0 networkd-dispat
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090313] [15160] 0 15160 18074 738 180224 0 -1000 sshd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090316] [ 3876] 1000 3876 332278 2092 356352 0 0 promtail
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090318] [30142] 0 30142 250293 2876 253952 0 0 amazon-ssm-agen
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090320] [19625] 0 19625 350480 4264 352256 0 0 containerd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090322] [20734] 0 20734 350208 8580 499712 0 -500 dockerd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090324] [11539] 1000 11539 44206 4114 225280 0 0 consul
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090326] [19294] 0 19294 72219 375 217088 0 0 polkitd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090328] [ 5840] 0 5840 10746 776 114688 0 -1000 systemd-udevd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090330] [10353] 100 10353 20042 501 184320 0 0 systemd-network
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090332] [10384] 101 10384 17720 226 180224 0 0 systemd-resolve
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090335] [10425] 62583 10425 33893 337 167936 0 0 systemd-timesyn
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090337] [10450] 0 10450 35556 1290 270336 0 0 systemd-journal
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090339] [10755] 0 10755 306634 3696 303104 0 -900 snapd
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090341] [ 992] 0 992 2683 211 77824 0 -999 containerd-shim
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090343] [ 1013] 0 1013 37772 2629 180224 0 0 promtail
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090346] [23324] 1000 23324 4383314 3919808 31670272 0 0 loki
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090349] [ 410] 999 410 55614 9873 245760 0 1000 netdata
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090352] [ 461] 999 461 4627 554 81920 0 1000 bash
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090354] [ 481] 999 481 5318 843 94208 0 1000 apps.plugin
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090356] [ 485] 999 485 27918 330 81920 0 1000 go.d.plugin
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.090357] Out of memory: Kill process 410 (netdata) score 1002 or sacrifice child
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.098863] Killed process 481 (apps.plugin) total-vm:21272kB, anon-rss:1464kB, file-rss:1908kB, shmem-rss:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.124577] oom_reaper: reaped process 481 (apps.plugin), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131868] loki invoked oom-killer: gfp_mask=0x14200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, oom_score_adj=0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131869] loki cpuset=/ mems_allowed=0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131875] CPU: 1 PID: 23325 Comm: loki Not tainted 4.15.0-1021-aws #21-Ubuntu
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131876] Hardware name: Amazon EC2 t3.xlarge/, BIOS 1.0 10/16/2017
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131877] Call Trace:
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131884] dump_stack+0x63/0x8b
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131889] dump_header+0x71/0x285
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131892] oom_kill_process+0x220/0x440
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131894] out_of_memory+0x2d1/0x4f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131896] __alloc_pages_slowpath+0xa90/0xe50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131900] ? alloc_pages_current+0x6a/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131905] __alloc_pages_nodemask+0x263/0x280
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131907] alloc_pages_current+0x6a/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131909] __page_cache_alloc+0x81/0xa0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131911] filemap_fault+0x378/0x6f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131914] ? timerqueue_del+0x24/0x50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131916] ? filemap_map_pages+0x22c/0x390
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131920] ext4_filemap_fault+0x31/0x44
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131924] __do_fault+0x24/0xf0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131926] handle_pte_fault+0x883/0xd30
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131927] __handle_mm_fault+0x478/0x5c0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131929] handle_mm_fault+0xb1/0x1f0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131933] __do_page_fault+0x250/0x4d0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131935] do_page_fault+0x2e/0xe0
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131938] ? async_page_fault+0x2f/0x50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131942] do_async_page_fault+0x51/0x80
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131944] async_page_fault+0x45/0x50
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.131946] RIP: 0033:0x438993
.
.
.
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.557304] Out of memory: Kill process 23324 (loki) score 970 or sacrifice child
May 10 14:48:20 ip-172-25-4-213 kernel: [4219389.565438] Killed process 23324 (loki) total-vm:17533256kB, anon-rss:15724904kB, file-rss:76kB, shmem-rss:0kB
May 10 14:48:20 ip-172-25-4-213 kernel: [4219390.164804] oom_reaper: reaped process 23324 (loki), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
Any update on this? We're hitting this issue all the time
same here
same here
Loki (v0.1.0) is oom-killed here as well. As soon as I fire a simple query but with a bigger timerange (e.g. 3h or 6h), memory consumption grows in like 10secs from nearly nothing (<1mb) to some 23gb until oomkiller kills loki.
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
16787 10001 20 0 26.0g 23.1g 0 S 75.1 78.5 0:34.57 loki
I'm running into this as well. It seems that certain queries initiate the crash, but not always. It seems that the queries can sometimes be safely run a little while after a redeploy. And running the same query via logcli doesn't appear to trigger the OOM error, but running it via Grafana: Explore does.
same here
Fixed in 0.2
Much appreciated! Would have been useful for convincing the team to stick with Loki if we had some feedback that this was being worked, but that's okay.