> # # Read metrics from VMware vCenter
> [[inputs.vsphere]]
> ## List of vCenter URLs to be monitored. These three lines must be uncommented
> ## and edited for the plugin to work.
> vcenters = [ "https://myvcenter1/sdk", "https://myvcenter2/sdk", "https://myvcenter3/sdk"]
> username = "myusernamer"
> password = "mypassword"
>
> interval = "20s"
>
> ## VMs
> ## Typical VM metrics (if omitted or empty, all metrics are collected)
> vm_metric_include = [
> "cpu.ready.summation",
> "cpu.costop.summation",
> "cpu.usagemhz.average",
> "cpu.usage.average",
> "mem.active.average",
> "mem.usage.average",
> "net.received.average",
> "net.transmitted.average",
> "net.usage.average",
> "net.packetsRx.summation",
> "net.packetsTx.summation",
> "virtualDisk.write.average",
> "virtualDisk.read.average",
> "virtualDisk.numberWriteAveraged.average",
> "virtualDisk.numberReadAveraged.average",
> "virtualDisk.totalWriteLatency.average",
> "virtualDisk.totalReadLatency.average",
> "disk.maxTotalLatency.latest",
> "disk.usage.average",
> "disk.read.average",
> "disk.write.average",
> "disk.commandsAveraged.average",
> ]
> # vm_metric_exclude = [] ## Nothing is excluded by default
> # vm_instances = true ## true by default
>
> ## Hosts
> ## Typical host metrics (if omitted or empty, all metrics are collected)
> host_metric_include = [
> "cpu.coreUtilization.average",
> "cpu.costop.summation",
> "cpu.demand.average",
> "cpu.idle.summation",
> "cpu.latency.average",
> "cpu.readiness.average",
> "cpu.ready.summation",
> "cpu.swapwait.summation",
> "cpu.usage.average",
> "cpu.usagemhz.average",
> "cpu.used.summation",
> "cpu.utilization.average",
> "cpu.wait.summation",
> "disk.deviceReadLatency.average",
> "disk.deviceWriteLatency.average",
> "disk.kernelReadLatency.average",
> "disk.kernelWriteLatency.average",
> "disk.numberReadAveraged.average",
> "disk.numberWriteAveraged.average",
> "disk.read.average",
> "disk.totalReadLatency.average",
> "disk.totalWriteLatency.average",
> "disk.write.average",
> "mem.active.average",
> "mem.latency.average",
> "mem.state.latest",
> "mem.swapin.average",
> "mem.swapinRate.average",
> "mem.swapout.average",
> "mem.swapoutRate.average",
> "mem.totalCapacity.average",
> "mem.usage.average",
> "mem.vmmemctl.average",
> "net.bytesRx.average",
> "net.bytesTx.average",
> "net.droppedRx.summation",
> "net.droppedTx.summation",
> "net.errorsRx.summation",
> "net.errorsTx.summation",
> "net.usage.average",
> "storageAdapter.numberReadAveraged.average",
> "storageAdapter.numberWriteAveraged.average",
> "storageAdapter.read.average",
> "storageAdapter.write.average",
> "sys.uptime.latest",
> ]
> # host_metric_exclude = [] ## Nothing excluded by default
> # host_instances = true ## true by default
>
> ## Clusters
> # cluster_metric_include = [] ## if omitted or empty, all metrics are collected
> # cluster_metric_exclude = [] ## Nothing excluded by default
> # cluster_instances = true ## true by default
>
> ## Datastores
> # datastore_metric_include = [
> # "datastore.numberReadAveraged.average",
> # "datastore.numberWriteAveraged.average",
> # "datastore.throughput.contention.average",
> # "datastore.throughput.usage.average",
> # "datastore.write.average",
> # "datastore.read.average",
> # "disk.used.latest",
> # "disk.provisioned.latest",
> # "disk.capacity.latest",
> # "disk.capacity.contention.average",
> # "disk.capacity.provisioned.average",
> # "disk.capacity.usage.average",
> # ] ## if omitted or empty, all metrics are collected
> # datastore_metric_exclude = [] ## Nothing excluded by default
> # datastore_instances = false ## false by default for Datastores only
>
> ## Datacenters
> datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
> datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
> # datacenter_instances = false ## false by default for Datastores only
>
> ## Plugin Settings
> ## separator character to use for measurement and field names (default: "_")
> # separator = "_"
>
> ## number of objects to retreive per query for realtime resources (vms and hosts)
> ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
> max_query_objects = 32
>
> ## number of metrics to retreive per query for non-realtime resources (clusters and datastores)
> ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
> max_query_metrics = 32
>
> ## number of go routines to use for collection and discovery of objects and metrics
> # collect_concurrency = 1
> # discover_concurrency = 1
>
> ## whether or not to force discovery of new objects on initial gather call before collecting metrics
> ## when true for large environments this may cause errors for time elapsed while collecting metrics
> ## when false (default) the first collection cycle may result in no or limited metrics while objects are discovered
> # force_discover_on_init = false
>
> ## the interval before (re)discovering objects subject to metrics collection (default: 300s)
> # object_discovery_interval = "300s"
>
> ## timeout applies to any of the api request made to vcenter
> timeout = "10s"
>
> ## Optional SSL Config
> # ssl_ca = "/path/to/cafile"
> # ssl_cert = "/path/to/certfile"
> # ssl_key = "/path/to/keyfile"
> ## Use SSL but skip chain & host verification
> insecure_skip_verify = true
Lastest telegraf version (1.9.0)
Centos 7.5
vSphere 6.0 update 3
No error
2018-11-26T14:56:21Z E! [inputs.vsphere]: Error in plugin: ServerFaultCode: This operation is restricted by the administrator - 'vpxd.stats.maxQueryMetrics'. Contact your system administrator.
Account used to access vSphere is read-only.
In the previous versions of Telegraf, no error.
Are you using the cluster metrics? A quick fix is to turn off cluster metrics. There seems to be a bug in some versions of vCenter restricting cluster queries that shouldn't be restricted.
Try to add this:
cluster_metric_exlcude = ["*"]
Indeed, this fix the issue.
But I plan to use the cluster stats at some point.
You can synthesize most of them using aggregating queries across hosts based on the "clustername" tag.
That being said, we are actively looking at options for fixing this.
To help us debugging this, could you tell me what version and build you have of vCenter?
vCenter Server 6.0 U3h - build 9313458
Same here. Running with telegraf 1.10.4-1 (from yum repository) and vCenter 6.7.0 Build 13007421 ...
This occurs for me with vCenter 6.7 U3 and telegraf 1.12.2 on RHEL7
Are you collecting a lot of cluster metrics? There's a bug in vCenter that causes issues when querying cluster metrics. In essence, it over-counts the number of objects queried and refuses due to perceived query complexity. The only way to get around this seems to be to increase vpxd.stats.maxQueryMetrics to a much higher value or setting it to -1, meaning that query sizes are unrestricted.
Depending on the time series database you're using, you could work around this by simply not querying the cluster metrics, but synthesizing them from host metrics. This also has the advantage that you can use metrics collected at a 20s interval, rather than the minimum 5m for cluster metrics. I think most advanced TSDBs have aggregating queries that can e.g. present the sum of CPU utilizations for all hosts in a cluster. In fact, that's how the cluster dashboards in Wavefront are built and I'm pretty sure InfluxDB allows you to do the same. Just something to consider.
Most helpful comment
Same here. Running with telegraf 1.10.4-1 (from yum repository) and vCenter 6.7.0 Build 13007421 ...