Fluent-bit: Ingress nginx parser not applied

Created on 27 Aug 2019  Â·  7Comments  Â·  Source: fluent/fluent-bit

Bug Report

Describe the bug
I want to parse nginx-ingress logs from Kubernetes using pod annotation fluentbit.io/parser: "k8s-nginx-ingress". Data is inserted in ElasticSearch but logs are not parsed.
In ES I see this:

{
  "_index": "kubernetes_cluster-2019.08.27",
  "_type": "flb_type",
  "_id": "WhNl0mwB0GmAmg-PSZTR",
  "_score": 1,
  "_source": {
    "@timestamp": "2019-08-27T09:25:42.979Z",
    "log": "10.244.0.1 - [10.244.0.1] - admin [27/Aug/2019:09:25:42 +0000] \"GET /kubernetes_cluster-2019.08.27/_search HTTP/2.0\" 200 1717 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36\" 49 0.012 [elk-elasticsearch-http] [] 10.244.0.203:9200 1717 0.016 200 8df3b3d29d019233c7ea9fbadbd0bf1e\n",
    "stream": "stdout",
    "time": "2019-08-27T09:25:42.97959453Z",
    "kubernetes": {
      "pod_name": "nginx-ingress-controller-xfqmp",
      "namespace_name": "default",
      "pod_id": "9860f847-1a12-4c40-a769-22737cb1cda5",
      "labels": {
        "app": "nginx-ingress",
        "component": "controller",
        "controller-revision-hash": "5dc6cd5c9b",
        "pod-template-generation": "1",
        "release": "nginx-ingress"
      },
      "annotations": {
        "fluentbit_io/exclude": "false",
        "fluentbit_io/parser": "k8s-nginx-ingress"
      },
      "host": "k8s-stage-ams3-worker-pool-r5nu",
      "container_name": "nginx-ingress-controller",
      "docker_id": "a7f466b8f59385e9ce21f898c5a4679ab911cd4543dc1c3ff5fa207c8f08e887"
    }
  },
  "fields": {
    "@timestamp": [
      "2019-08-27T09:25:42.979Z"
    ],
    "time": [
      "2019-08-27T09:25:42.979Z"
    ]
  }
}

Using Terraform to deploy it using a helm chart. I created a custom config map:

fluent-bit.conf
@INCLUDE fluent-bit-service.conf
@INCLUDE fluent-bit-input.conf
@INCLUDE fluent-bit-filter.conf
@INCLUDE fluent-bit-output.conf
fluent-bit-filter.conf
[FILTER]
    Name                kubernetes
    Match               kube.*
    Kube_URL            https://kubernetes.default.svc:443
    Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
    K8S-Logging.Parser  On
    K8S-Logging.Exclude On
    Merge_Log_Trim On
    Merge_Log Off
fluent-bit-input.conf
[INPUT]
    Name             tail
    Path             /var/log/containers/nginx-ingress-controller-*.log
    Parser           docker
    Tag              kube.*
    Refresh_Interval 5
    Mem_Buf_Limit    10MB
    Skip_Long_Lines  On
fluent-bit-output.conf
[OUTPUT]
    Name  es
    Match *
    Host  ${es_host}
    Port  ${es_port}
    Logstash_Format On
    Retry_Limit False
    Type  flb_type
    Time_Key @timestamp
    Replace_Dots On
    Logstash_Prefix kubernetes_cluster

    HTTP_User ${es_user}
    HTTP_Passwd ${es_password}
fluent-bit-service.conf
[SERVICE]
    Flush        1
    Daemon       Off
    Log_Level    debug
    Parsers_File parsers.conf
parsers.conf

[PARSER]
    Name        k8s-nginx-ingress
    Format      regex
    Regex       ^(?<host>[^ ]*) - \[(?<real_ip>[^ ]*)\] - (?<user>[^ ]*) \[(?<time>[^\]]*)\] \\*"(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?\\*" (?<code>[^ ]*) (?<size>[^ ]*) \\*"(?<referer>[^\"]*)\\*" \\*"(?<agent>[^\"]*)\\*" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] \[(?<proxy_alternative_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<reg_id>[^ ]*).*$
    Time_Key    time
    Time_Format %d/%b/%Y:%H:%M:%S %z

[PARSER]
    Name         docker
    Format       json
    Time_Key     time
    Time_Format  %Y-%m-%dT%H:%M:%S.%L
    Time_Keep    On
    # --
    # Since Fluent Bit v1.2, if you are parsing Docker logs and using
    # the Kubernetes filter, it's not longer required to decode the
    # 'log' key.
    #
    # Command      |  Decoder | Field | Optional Action
    # =============|==================|=================
    Decode_Field_As   escaped    log
    #Decode_Field_As    json     log

Here you can see the regexp is parsing ok

Fluent Bit logs seems to be ok:

Fluent Bit v1.2.2
Copyright (C) Treasure Data

[2019/08/27 09:25:40] [debug] [storage] [cio stream] new stream registered: tail.0
[2019/08/27 09:25:40] [ info] [storage] initializing...
[2019/08/27 09:25:40] [ info] [storage] in-memory
[2019/08/27 09:25:40] [ info] [storage] normal synchronization mode, checksum disabled, max_chunks_up=128
[2019/08/27 09:25:40] [ info] [engine] started (pid=1)
[2019/08/27 09:25:40] [debug] [engine] coroutine stack size: 24576 bytes (24.0K)
[2019/08/27 09:25:40] [debug] [in_tail] inotify watch fd=19
[2019/08/27 09:25:40] [debug] [in_tail] scanning path /var/log/containers/nginx-ingress-controller-*.log
[2019/08/27 09:25:40] [debug] [in_tail] add to scan queue /var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log, offset=0
[2019/08/27 09:25:40] [debug] [out_es] host=elasticsearch.elk port=9200 uri=/_bulk index=fluent-bit type=flb_type
[2019/08/27 09:25:40] [ info] [filter_kube] https=1 host=kubernetes.default.svc port=443
[2019/08/27 09:25:40] [ info] [filter_kube] local POD info OK
[2019/08/27 09:25:40] [ info] [filter_kube] testing connectivity with API server...
[2019/08/27 09:25:40] [debug] [filter_kube] API Server (ns=elk, pod=fluent-bit-rsq2s) http_do=0, HTTP Status: 200
[2019/08/27 09:25:40] [ info] [filter_kube] API server connectivity OK
[2019/08/27 09:25:40] [debug] [router] match rule tail.0:es.0
[2019/08/27 09:25:40] [ info] [sp] stream processor started
[2019/08/27 09:25:41] [debug] [filter_kube] API Server (ns=default, pod=nginx-ingress-controller-r5w2r) http_do=0, HTTP Status: 200
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32767 lines=70
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32337 lines=68
[2019/08/27 09:25:41] [debug] [task] created task=0x7ff84f8430c0 id=0 OK
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32579 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32589 lines=70
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32558 lines=73
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32614 lines=70
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32409 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32672 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32441 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32553 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32541 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32543 lines=70
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32755 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32758 lines=72
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32462 lines=71
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=32453 lines=110
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=23833 lines=75
[2019/08/27 09:25:41] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log promote to TAIL_EVENT
[2019/08/27 09:25:41] [debug] [out_es] HTTP Status=200 URI=/_bulk
[2019/08/27 09:25:41] [debug] [out_es Elasticsearch response
{"took":738,"errors":false,"items":[{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"FRNl0mwB0GmAmg-PLGGE","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"FhNl0mwB0GmAmg-PLGGE","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"FxNl0mwB0GmAmg-PLGGE","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"GBNl0mwB0GmAmg-PLGGE","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":1,"_primary_term":1,"status":201}},{"index":{"_index":"
[2019/08/27 09:25:41] [debug] [task] destroy task=0x7ff84f8430c0 (task_id=0)
[2019/08/27 09:25:41] [debug] [task] created task=0x7ff84f8430c0 id=0 OK
[2019/08/27 09:25:42] [debug] [out_es] HTTP Status=200 URI=/_bulk
[2019/08/27 09:25:42] [debug] [out_es Elasticsearch response
{"took":386,"errors":false,"items":[{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"nxNl0mwB0GmAmg-PLmEc","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":28,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"oBNl0mwB0GmAmg-PLmEc","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":37,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"oRNl0mwB0GmAmg-PLmEc","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":25,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"ohNl0mwB0GmAmg-PLmEc","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":21,"_primary_term":1,"status":201}},{"index":{"_inde
[2019/08/27 09:25:42] [debug] [task] destroy task=0x7ff84f8430c0 (task_id=0)
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log event
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=461 lines=1
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log event
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=402 lines=1
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log event
[2019/08/27 09:25:43] [debug] [in_tail] file=/var/log/containers/nginx-ingress-controller-r5w2r_default_nginx-ingress-controller-c43ebee4c3ef573106dde36b71b9e8911ff724112b76328e8965b2d2226160a4.log read=367 lines=1
[2019/08/27 09:25:43] [debug] [task] created task=0x7ff84f8430c0 id=0 OK
[2019/08/27 09:25:48] [debug] [out_es] HTTP Status=200 URI=/_bulk
[2019/08/27 09:25:48] [debug] [out_es Elasticsearch response
{"took":4718,"errors":false,"items":[{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"4NBl0mwBh79WkDYkNl2B","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":238,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"4dBl0mwBh79WkDYkNl2B","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":239,"_primary_term":1,"status":201}},{"index":{"_index":"kubernetes_cluster-2019.08.27","_type":"flb_type","_id":"4tBl0mwBh79WkDYkNl2B","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":240,"_primary_term":1,"status":201}}]}
[2019/08/27 09:25:48] [debug] [task] destroy task=0x7ff84f8430c0 (task_id=0)

Now, I don't understand why log is not parsed and there is no logs in fluent bit that say it will apply specific parser or error about reg not beeing valid.

Plus, how do I skip the logs that don't match the reg exp?

Any ideas?

Expected behavior

I would like to see the log parsed as json with all those fields from regexp.

Your Environment

  • Version used: v1.2.2
  • Configuration: helm chart
  • Environment name and version (e.g. Kubernetes? What version?): Kuberentes on DO, 1.15.3
  • Server type and version:
  • Operating System and version: Debian
  • Filters and plugins: tails, kuberentes

Thx a lot for your help.

Most helpful comment

ingress-nginx has changed its log format and the k8s-ingress-nginx parser isn't valid. See #1450 for the correction.

I also struggled a bit with the fact that I'm using containerd and the cri parser rather than the docker parser. For those using the Docker runtime, the adjustment in #1450 should suffice. If you're using containerd, you may benefit from the following ...

It wasn't obvious how to chain the cri parser with a fluentbit.io/parser annotated parser, in part because the predefined cri parser labels the remainder of the containerd log line as message rather than (as I eventually learned is necessary) log.

After modifying the cri parser to label the unparsed content as log instead of message , I then ran up against the fact that with Merge_Log On the default parser (downstream of the cri parser) assumes the log value is JSON, which then spewed could not merge JSON log warnings. Fixing this required using Merge_Parser, which is recommended against in the documentation, but I couldn't see how else to resolve the problem.

Long story short, in the config excerpt below:

  • The tail input is used with the cri parser
  • The cri parser is like stock except that the message label has been renamed to log
  • I defined a catchall parser which is used when the fluentbit.io/parser annotation is missing, which simply re-tags the log content from the cri parser as message. Then I use Keep_Log Off to drop the original log tag.
  • My version of the k8s-nginx-ingress parser, unlike #1450, also preserves the original log as message.
    [INPUT]
        Name              tail
        Tag               kube.*
        Path              /var/log/containers/*.log
        Parser            cri
        DB                /var/log/flb_kube.db
        Mem_Buf_Limit     5MB
        Skip_Long_Lines   On
        Refresh_Interval  10

    [FILTER]
        Name                kubernetes
        Match               kube.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
        Kube_Tag_Prefix     kube.var.log.containers.
        Merge_Log           On
        Merge_Parser        catchall
        Keep_Log            Off
        K8S-Logging.Parser  On                                                                                                                                                                                                                                                                                                                                          
        K8S-Logging.Exclude On

    [PARSER]
        Name        k8s-nginx-ingress
        Format      regex
        Regex       ^(?<message>(?<remote>[^ ]*) - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] \[(?<proxy_alternative_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<req_id>[^ ]*).*)$
        Time_Key    time
        Time_Format %d/%b/%Y:%H:%M:%S %z

    [PARSER]
        # http://rubular.com/r/tjUt3Awgg4
        Name        cri
        Format      regex
        # XXX: modified from upstream: s/message/log/
        Regex       ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<log>.*)$
        Time_Key    time
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z

    [PARSER]
        Name    catchall
        Format  regex
        Regex   ^(?<message>.*)$

All 7 comments

I've the same behavior from a different custom parser. One custom parser for stderr works. A different custom parser for stdout quietly doesn't do anything at all. Logs even at debug are completely silent on the problem.

thx @mwpeterson , wandering what can be the cause. There is a TRACE option when we build the fluent-bit, maybe that log level can tell us more about what's going on. But strange that in debug mode there is nothing at all.

@mwpeterson, can you share your parser that works?

After a good night's sleep my "broken" parser is sorta working too… once I found and fixed the overgreedy grep exclude pattern that was unexpectedly matching the lines I wasn't seeing.

But the "broken" parser's regex is still buggy.

[PARSER]
    Name         seelog
    Format       regex
    Regex        ^\d+ \[(?<level>[^\]]+)\] (?<log>.*)

In Fluentular and Rubular it parses any of the seelog formated lines I give it.

1567010063196721148 [Info] Add/Update for Pod kube-state-metrics-7476496db-7sqxr on my node, namespace = monitoring, IP = \n
1567010063502764285 [Info] Pods deleted on my node: kube-system/cni-metrics-helper-7889dc4cf8-bkmhf\n
1567010089962180606 [Info] GetCNIPods start...\n
1567010089962204793 [Info] GetCNIPods discovered[aws-node-v9m6k aws-node-7sfmw aws-node-f5whn aws-node-wrrfj aws-node-zcvlg aws-node-5kpbm aws-node-92zct aws-node-d4xc9 aws-node-mr56x aws-node-glk2l aws-node-kffm6 aws-node-wx5js aws-node-fnnbp aws-node-qkwlv]\n

but the parser itself isn't parsing the two GetCNIPods lines.

The container is https://github.com/aws/amazon-vpc-cni-k8s/tree/master/cni-metrics-helper
It logs glog on stderr and seelog on stdout. seelog on stdout is what is giving me problems.
The working glog parser:

[PARSER]
    Name         glog
    Format       regex
    Regex        ^(?<level>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)
    Time_Key     time
    Time_Format  %m%d %H:%M:%S.%L
    Time_Keep    Off

And the container annotations:

      annotations:
        fluentbit.io/parser_stderr: "glog"
        fluentbit.io/parser_stdout: "seelog"

:cry:

```

@botzill the tl;dr: set Merge_Log On to your kubernetes filter block :smile:

I gave up on my problem and tried the k8s-nginx-ingress the since it happened to be the next log on my todo list.

I added fluentbit.io/parser: "k8s-nginx-ingress" to the annotations and the logs are being processed as I expect, with k8s-nginx-ingress handling the stdout stream, and glog catching the staderr stream.

Notable differences between our kubernetes filter blocks are

    Merge_Log           On
    Merge_Parser        glog

and the docs say it's mandatory that Merge_Log be On to process the log value via annotations https://docs.fluentbit.io/manual/filter/kubernetes#processing-the-log-value

The Merge_Parser glog bit wouldn't have any impact on your k8s-nginx-ingress processing. I included it to illustrate that the seelog processing that _isn't_ working for me _should_ be working for me if bang the right rocks together in the correct order. :joy: I put it in to automagically process glog lines since so much of kube-system uses that format and in my cluster I can't add annotations because of kops.

{
  "log": "166.145.249.129 - [166.145.249.129] - - [28/Aug/2019:21:16:29 +0000] \"POST /job/Code%20Pipeline/ajaxExecutors HTTP/1.1\" 200 965 \"https://jenkins.domain.com/job/Code%20Pipeline/\" \"Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0\" 1807 0.032 [domain-jenkins-8080] [] 10.60.122.104:8080 965 0.032 200 d91740ac44a2070eb2a1280323efb285\n",
  "stream": "stdout",
  "time": "2019-08-28T21:16:29.252415898Z",
  "host": "166.145.249.129",
  "real_ip": "166.145.249.129",
  "user": "-",
  "method": "POST",
  "path": "/job/Code%20Pipeline/ajaxExecutors",
  "code": "200",
  "size": "965",
  "referer": "https://jenkins.domain.com/job/Code%20Pipeline/",
  "agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
  "request_length": "1807",
  "request_time": "0.032",
  "proxy_upstream_name": "domain-jenkins-8080",
  "upstream_addr": "[]",
  "upstream_response_length": "10.60.122.104:8080",
  "upstream_response_time": "965",
  "upstream_status": "0.032",
  "last": "200 d91740ac44a2070eb2a1280323efb285",
  "kubernetes": {
    "pod_name": "nginx-ingress-controller-7b657f6cb8-rxw2h",
    "namespace_name": "ingress-nginx",
    "pod_id": "99b7017d-7b7c-4723-ace2-3a4ba0c26a8d",
    "labels": {
      "app.kubernetes.io/name": "ingress-nginx",
      "app.kubernetes.io/part-of": "ingress-nginx",
      "pod-template-hash": "7b657f6cb8"
    },
    "annotations": {
      "fluentbit.io/parser": "k8s-nginx-ingress",
      "prometheus.io/port": "10254",
      "prometheus.io/scrape": "true"
    },
    "host": "10.153.254.221",
    "container_name": "nginx-ingress-controller",
    "docker_id": "48df04c33f80a4163efed7e6b396d6cec1088de24aedde4dbd10980fba494c5d"
  }
}
{
  "log": "healthcheck error: Get http+unix://nginx-status/healthz: dial unix /tmp/nginx-status-server.sock: connect: connection refused",
  "stream": "stderr",
  "time": "2019-08-28T21:16:34.365330201Z",
  "severity": "E",
  "pid": "8",
  "source": "checker.go:41",
  "kubernetes": {
    "pod_name": "nginx-ingress-controller-79f6884cf6-mvxvr",
    "namespace_name": "ingress-nginx",
    "pod_id": "46512e69-db0c-4932-967d-318cd75de0ad",
    "labels": {
      "app.kubernetes.io/name": "ingress-nginx",
      "app.kubernetes.io/part-of": "ingress-nginx",
      "pod-template-hash": "79f6884cf6"
    },
    "annotations": {
      "prometheus.io/port": "10254",
      "prometheus.io/scrape": "true"
    },
    "host": "10.198.49.99",
    "container_name": "nginx-ingress-controller",
    "docker_id": "5d97b1f8320eb36348d06e2265008bb023dae980c79358aa8335639a228baca0"
  }
}
fluent-bit.conf 

[SERVICE]
    Flush        1
    Daemon       Off
    Log_Level    info
    Parsers_File parsers.conf
    Parsers_File parsers_custom.conf
    HTTP_Server  On
    HTTP_Listen  0.0.0.0
    HTTP_Port    2020

[INPUT]
    Name             tail
    Path             /var/log/containers/*.log
    Exclude_path     *fluent-bit-daemon*,*fluentd*
    Parser           docker
    Tag              kubernetes.*
    Refresh_Interval 5
    Mem_Buf_Limit    5MB
    Skip_Long_Lines  On
    DB               /tail-db/tail-containers-state.db
    DB.Sync          Normal

[FILTER]
    Name                kubernetes
    Match               kubernetes.*
    Kube_Tag_Prefix     kubernetes.var.log.containers.
    Kube_URL            https://kubernetes.default.svc:443
    Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
    Merge_Parser        glog
    Merge_Log           On
    Merge_Log_Trim      On
    K8S-Logging.Parser  On
    K8S-Logging.Exclude On

[OUTPUT]
    Name          forward
    Match         *
    Host          fluentd.logging.svc
    Port          24240

    Retry_Limit   False



md5-c73511ef519a2728e7577ce835113bc5



parsers.conf 
[PARSER]
    Name        k8s-nginx-ingress
    Format      regex
    Regex       ^(?<host>[^ ]*) - \[(?<real_ip>[^ ]*)\] - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<last>[^$]*)
    Time_Key    time
    Time_Format %d/%b/%Y:%H:%M:%S %z

[PARSER]
    Name         docker
    Format       json
    Time_Key     time
    Time_Format  %Y-%m-%dT%H:%M:%S.%L
    Time_Keep    On
    # --
    # Since Fluent Bit v1.2, if you are parsing Docker logs and using
    # the Kubernetes filter, it's not longer required to decode the
    # 'log' key.
    #
    # Command      |  Decoder | Field | Optional Action
    # =============|==================|=================
    #Decode_Field_As    json     log



md5-c73511ef519a2728e7577ce835113bc5



parsers_custom.conf 
[PARSER]
    Name         glog
    Format       regex
    Regex        ^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)
    Time_Key     time
    Time_Format  %m%d %H:%M:%S.%L
    Time_Keep    Off

This doens't work, someone was able to put this parser working ?! i would love to see a detailed guide on how to parse the ingress logs

ingress-nginx has changed its log format and the k8s-ingress-nginx parser isn't valid. See #1450 for the correction.

I also struggled a bit with the fact that I'm using containerd and the cri parser rather than the docker parser. For those using the Docker runtime, the adjustment in #1450 should suffice. If you're using containerd, you may benefit from the following ...

It wasn't obvious how to chain the cri parser with a fluentbit.io/parser annotated parser, in part because the predefined cri parser labels the remainder of the containerd log line as message rather than (as I eventually learned is necessary) log.

After modifying the cri parser to label the unparsed content as log instead of message , I then ran up against the fact that with Merge_Log On the default parser (downstream of the cri parser) assumes the log value is JSON, which then spewed could not merge JSON log warnings. Fixing this required using Merge_Parser, which is recommended against in the documentation, but I couldn't see how else to resolve the problem.

Long story short, in the config excerpt below:

  • The tail input is used with the cri parser
  • The cri parser is like stock except that the message label has been renamed to log
  • I defined a catchall parser which is used when the fluentbit.io/parser annotation is missing, which simply re-tags the log content from the cri parser as message. Then I use Keep_Log Off to drop the original log tag.
  • My version of the k8s-nginx-ingress parser, unlike #1450, also preserves the original log as message.
    [INPUT]
        Name              tail
        Tag               kube.*
        Path              /var/log/containers/*.log
        Parser            cri
        DB                /var/log/flb_kube.db
        Mem_Buf_Limit     5MB
        Skip_Long_Lines   On
        Refresh_Interval  10

    [FILTER]
        Name                kubernetes
        Match               kube.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
        Kube_Tag_Prefix     kube.var.log.containers.
        Merge_Log           On
        Merge_Parser        catchall
        Keep_Log            Off
        K8S-Logging.Parser  On                                                                                                                                                                                                                                                                                                                                          
        K8S-Logging.Exclude On

    [PARSER]
        Name        k8s-nginx-ingress
        Format      regex
        Regex       ^(?<message>(?<remote>[^ ]*) - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] \[(?<proxy_alternative_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<req_id>[^ ]*).*)$
        Time_Key    time
        Time_Format %d/%b/%Y:%H:%M:%S %z

    [PARSER]
        # http://rubular.com/r/tjUt3Awgg4
        Name        cri
        Format      regex
        # XXX: modified from upstream: s/message/log/
        Regex       ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<log>.*)$
        Time_Key    time
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z

    [PARSER]
        Name    catchall
        Format  regex
        Regex   ^(?<message>.*)$
Was this page helpful?
0 / 5 - 0 ratings