diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 88d32b1c5..d99d1dc38 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,5 +1,6 @@ SUBDIRS = zfs zpool zdb zhack zinject zstream zstreamdump ztest SUBDIRS += fsck_zfs vdev_id raidz_test zfs_ids_to_path +SUBDIRS += zpool_influxdb if USING_PYTHON SUBDIRS += arcstat arc_summary dbufstat diff --git a/cmd/zpool_influxdb/Makefile.am b/cmd/zpool_influxdb/Makefile.am new file mode 100644 index 000000000..d87afc48f --- /dev/null +++ b/cmd/zpool_influxdb/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/config/Rules.am + +bin_PROGRAMS = zpool_influxdb + +zpool_influxdb_SOURCES = \ + zpool_influxdb.c + +zpool_influxdb_LDADD = \ + $(top_builddir)/lib/libspl/libspl.la \ + $(top_builddir)/lib/libnvpair/libnvpair.la \ + $(top_builddir)/lib/libzfs/libzfs.la diff --git a/cmd/zpool_influxdb/README.md b/cmd/zpool_influxdb/README.md new file mode 100644 index 000000000..7d6c253d7 --- /dev/null +++ b/cmd/zpool_influxdb/README.md @@ -0,0 +1,294 @@ +# Influxdb Metrics for ZFS Pools +The _zpool_influxdb_ program produces +[influxdb](https://github.com/influxdata/influxdb) line protocol +compatible metrics from zpools. In the UNIX tradition, _zpool_influxdb_ +does one thing: read statistics from a pool and print them to +stdout. In many ways, this is a metrics-friendly output of +statistics normally observed via the `zpool` command. + +## Usage +When run without arguments, _zpool_influxdb_ runs once, reading data +from all imported pools, and prints to stdout. +```shell +zpool_influxdb [options] [poolname] +``` +If no poolname is specified, then all pools are sampled. + +| option | short option | description | +|---|---|---| +| --execd | -e | For use with telegraf's `execd` plugin. When [enter] is pressed, the pools are sampled. To exit, use [ctrl+D] | +| --no-histogram | -n | Do not print histogram information | +| --signed-int | -i | Use signed integer data type (default=unsigned) | +| --sum-histogram-buckets | -s | Sum histogram bucket values | +| --tags key=value[,key=value...] | -t | Add tags to data points. No tag sanity checking is performed. | +| --help | -h | Print a short usage message | + +#### Histogram Bucket Values +The histogram data collected by ZFS is stored as independent bucket values. +This works well out-of-the-box with an influxdb data source and grafana's +heatmap visualization. The influxdb query for a grafana heatmap +visualization looks like: +``` +field(disk_read) last() non_negative_derivative(1s) +``` + +Another method for storing histogram data sums the values for lower-value +buckets. For example, a latency bucket tagged "le=10" includes the values +in the bucket "le=1". +This method is often used for prometheus histograms. +The `zpool_influxdb --sum-histogram-buckets` option presents the data from ZFS +as summed values. + +## Measurements +The following measurements are collected: + +| measurement | description | zpool equivalent | +|---|---|---| +| zpool_stats | general size and data | zpool list | +| zpool_scan_stats | scrub, rebuild, and resilver statistics (omitted if no scan has been requested) | zpool status | +| zpool_vdev_stats | per-vdev statistics | zpool iostat -q | +| zpool_io_size | per-vdev I/O size histogram | zpool iostat -r | +| zpool_latency | per-vdev I/O latency histogram | zpool iostat -w | +| zpool_vdev_queue | per-vdev instantaneous queue depth | zpool iostat -q | + +### zpool_stats Description +zpool_stats contains top-level summary statistics for the pool. +Performance counters measure the I/Os to the pool's devices. + +#### zpool_stats Tags + +| label | description | +|---|---| +| name | pool name | +| path | for leaf vdevs, the pathname | +| state | pool state, as shown by _zpool status_ | +| vdev | vdev name (root = entire pool) | + +#### zpool_stats Fields + +| field | units | description | +|---|---|---| +| alloc | bytes | allocated space | +| free | bytes | unallocated space | +| size | bytes | total pool size | +| read_bytes | bytes | bytes read since pool import | +| read_errors | count | number of read errors | +| read_ops | count | number of read operations | +| write_bytes | bytes | bytes written since pool import | +| write_errors | count | number of write errors | +| write_ops | count | number of write operations | + +### zpool_scan_stats Description +Once a pool has been scrubbed, resilvered, or rebuilt, the zpool_scan_stats +contain information about the status and performance of the operation. +Otherwise, the zpool_scan_stats do not exist in the kernel, and therefore +cannot be reported by this collector. + +#### zpool_scan_stats Tags + +| label | description | +|---|---| +| name | pool name | +| function | name of the scan function running or recently completed | +| state | scan state, as shown by _zpool status_ | + +#### zpool_scan_stats Fields + +| field | units | description | +|---|---|---| +| errors | count | number of errors encountered by scan | +| examined | bytes | total data examined during scan | +| to_examine | bytes | prediction of total bytes to be scanned | +| pass_examined | bytes | data examined during current scan pass | +| issued | bytes | size of I/Os issued to disks | +| pass_issued | bytes | size of I/Os issued to disks for current pass | +| processed | bytes | data reconstructed during scan | +| to_process | bytes | total bytes to be repaired | +| rate | bytes/sec | examination rate | +| start_ts | epoch timestamp | start timestamp for scan | +| pause_ts | epoch timestamp | timestamp for a scan pause request | +| end_ts | epoch timestamp | completion timestamp for scan | +| paused_t | seconds | elapsed time while paused | +| remaining_t | seconds | estimate of time remaining for scan | + +### zpool_vdev_stats Description +The ZFS I/O (ZIO) scheduler uses five queues to schedule I/Os to each vdev. +These queues are further divided into active and pending states. +An I/O is pending prior to being issued to the vdev. An active +I/O has been issued to the vdev. The scheduler and its tunable +parameters are described at the +[ZFS documentation for ZIO Scheduler] +(https://openzfs.github.io/openzfs-docs/Performance%20and%20Tuning/ZIO%20Scheduler.html) +The ZIO scheduler reports the queue depths as gauges where the value +represents an instantaneous snapshot of the queue depth at +the sample time. Therefore, it is not unusual to see all zeroes +for an idle pool. + +#### zpool_vdev_stats Tags +| label | description | +|---|---| +| name | pool name | +| vdev | vdev name (root = entire pool) | + +#### zpool_vdev_stats Fields +| field | units | description | +|---|---|---| +| sync_r_active_queue | entries | synchronous read active queue depth | +| sync_w_active_queue | entries | synchronous write active queue depth | +| async_r_active_queue | entries | asynchronous read active queue depth | +| async_w_active_queue | entries | asynchronous write active queue depth | +| async_scrub_active_queue | entries | asynchronous scrub active queue depth | +| sync_r_pend_queue | entries | synchronous read pending queue depth | +| sync_w_pend_queue | entries | synchronous write pending queue depth | +| async_r_pend_queue | entries | asynchronous read pending queue depth | +| async_w_pend_queue | entries | asynchronous write pending queue depth | +| async_scrub_pend_queue | entries | asynchronous scrub pending queue depth | + +### zpool_latency Histogram +ZFS tracks the latency of each I/O in the ZIO pipeline. This latency can +be useful for observing latency-related issues that are not easily observed +using the averaged latency statistics. + +The histogram fields show cumulative values from lowest to highest. +The largest bucket is tagged "le=+Inf", representing the total count +of I/Os by type and vdev. + +#### zpool_latency Histogram Tags +| label | description | +|---|---| +| le | bucket for histogram, latency is less than or equal to bucket value in seconds | +| name | pool name | +| path | for leaf vdevs, the device path name, otherwise omitted | +| vdev | vdev name (root = entire pool) | + +#### zpool_latency Histogram Fields +| field | units | description | +|---|---|---| +| total_read | operations | read operations of all types | +| total_write | operations | write operations of all types | +| disk_read | operations | disk read operations | +| disk_write | operations | disk write operations | +| sync_read | operations | ZIO sync reads | +| sync_write | operations | ZIO sync writes | +| async_read | operations | ZIO async reads| +| async_write | operations | ZIO async writes | +| scrub | operations | ZIO scrub/scan reads | +| trim | operations | ZIO trim (aka unmap) writes | + +### zpool_io_size Histogram +ZFS tracks I/O throughout the ZIO pipeline. The size of each I/O is used +to create a histogram of the size by I/O type and vdev. For example, a +4KiB write to mirrored pool will show a 4KiB write to the top-level vdev +(root) and a 4KiB write to each of the mirror leaf vdevs. + +The ZIO pipeline can aggregate I/O operations. For example, a contiguous +series of writes can be aggregated into a single, larger I/O to the leaf +vdev. The independent I/O operations reflect the logical operations and +the aggregated I/O operations reflect the physical operations. + +The histogram fields show cumulative values from lowest to highest. +The largest bucket is tagged "le=+Inf", representing the total count +of I/Os by type and vdev. + +Note: trim I/Os can be larger than 16MiB, but the larger sizes are +accounted in the 16MiB bucket. + +#### zpool_io_size Histogram Tags +| label | description | +|---|---| +| le | bucket for histogram, I/O size is less than or equal to bucket value in bytes | +| name | pool name | +| path | for leaf vdevs, the device path name, otherwise omitted | +| vdev | vdev name (root = entire pool) | + +#### zpool_io_size Histogram Fields +| field | units | description | +|---|---|---| +| sync_read_ind | blocks | independent sync reads | +| sync_write_ind | blocks | independent sync writes | +| async_read_ind | blocks | independent async reads | +| async_write_ind | blocks | independent async writes | +| scrub_read_ind | blocks | independent scrub/scan reads | +| trim_write_ind | blocks | independent trim (aka unmap) writes | +| sync_read_agg | blocks | aggregated sync reads | +| sync_write_agg | blocks | aggregated sync writes | +| async_read_agg | blocks | aggregated async reads | +| async_write_agg | blocks | aggregated async writes | +| scrub_read_agg | blocks | aggregated scrub/scan reads | +| trim_write_agg | blocks | aggregated trim (aka unmap) writes | + +#### About unsigned integers +Telegraf v1.6.2 and later support unsigned 64-bit integers which more +closely matches the uint64_t values used by ZFS. By default, zpool_influxdb +uses ZFS' uint64_t values and influxdb line protocol unsigned integer type. +If you are using old telegraf or influxdb where unsigned integers are not +available, use the `--signed-int` option. + +## Using _zpool_influxdb_ + +The simplest method is to use the execd input agent in telegraf. For older +versions of telegraf which lack execd, the exec input agent can be used. +For convenience, one of the sample config files below can be placed in the +telegraf config-directory (often /etc/telegraf/telegraf.d). Telegraf can +be restarted to read the config-directory files. + +### Example telegraf execd configuration +```toml +# # Read metrics from zpool_influxdb +[[inputs.execd]] +# ## default installation location for zpool_influxdb command + command = ["/usr/bin/zpool_influxdb", "--execd"] + + ## Define how the process is signaled on each collection interval. + ## Valid values are: + ## "none" : Do not signal anything. (Recommended for service inputs) + ## The process must output metrics by itself. + ## "STDIN" : Send a newline on STDIN. (Recommended for gather inputs) + ## "SIGHUP" : Send a HUP signal. Not available on Windows. (not recommended) + ## "SIGUSR1" : Send a USR1 signal. Not available on Windows. + ## "SIGUSR2" : Send a USR2 signal. Not available on Windows. + signal = "STDIN" + + ## Delay before the process is restarted after an unexpected termination + restart_delay = "10s" + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +``` + +### Example telegraf exec configuration +```toml +# # Read metrics from zpool_influxdb +[[inputs.exec]] +# ## default installation location for zpool_influxdb command + commands = ["/usr/bin/zpool_influxdb"] + data_format = "influx" +``` + +## Caveat Emptor +* Like the _zpool_ command, _zpool_influxdb_ takes a reader + lock on spa_config for each imported pool. If this lock blocks, + then the command will also block indefinitely and might be + unkillable. This is not a normal condition, but can occur if + there are bugs in the kernel modules. + For this reason, care should be taken: + * avoid spawning many of these commands hoping that one might + finish + * avoid frequent updates or short sample time + intervals, because the locks can interfere with the performance + of other instances of _zpool_ or _zpool_influxdb_ + +## Other collectors +There are a few other collectors for zpool statistics roaming around +the Internet. Many attempt to screen-scrape `zpool` output in various +ways. The screen-scrape method works poorly for `zpool` output because +of its human-friendly nature. Also, they suffer from the same caveats +as this implementation. This implementation is optimized for directly +collecting the metrics and is much more efficient than the screen-scrapers. + +## Feedback Encouraged +Pull requests and issues are greatly appreciated at +https://github.com/openzfs/zfs diff --git a/cmd/zpool_influxdb/dashboards/README.md b/cmd/zpool_influxdb/dashboards/README.md new file mode 100644 index 000000000..2fdbe4983 --- /dev/null +++ b/cmd/zpool_influxdb/dashboards/README.md @@ -0,0 +1,3 @@ +### Dashboards for zpool_influxdb +This directory contains a collection of dashboards related to ZFS with data +collected from the zpool_influxdb collector. diff --git a/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json b/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json new file mode 100644 index 000000000..a99f92783 --- /dev/null +++ b/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json @@ -0,0 +1,1667 @@ +{ + "__inputs": [ + { + "name": "DS_MACBOOK-INFLUX", + "label": "macbook-influx", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.3" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "jdbranham-diagram-panel", + "name": "Diagram", + "version": "1.4.5" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:1627", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Top-level ZFS pool latency by ZIO type", + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1590445168391, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": "${DS_MACBOOK-INFLUX}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "panels": [], + "title": "Total Reads and Writes", + "type": "row" + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the total reads of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 2, + "legend": { + "show": true + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "total_read" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Reads", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the total writes of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 3, + "legend": { + "show": true + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "total_write" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Writes", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "collapsed": false, + "datasource": "${DS_MACBOOK-INFLUX}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 8, + "panels": [], + "title": "ZIO Scheduler Queues for Read Operations", + "type": "row" + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the synchronous reads of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 11 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 6, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "sync_read" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Read Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the asynchronous reads of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 5, + "y": 11 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 9, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "async_read" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Async Read Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the scrub or scan reads of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 11 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 10, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "scrub" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Scrub/Scan Read Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the actual disk reads of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 11 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 11, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "disk_read" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Read Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "collapsed": false, + "datasource": "${DS_MACBOOK-INFLUX}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 13, + "panels": [], + "title": "ZIO Scheduler Queues for Write Operations", + "type": "row" + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the synchronous writes of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 20 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 14, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "sync_write" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Write Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the asynchronous writes of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 5, + "y": 20 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 15, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "async_write" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Async Write Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the trim or unmap operations of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 20 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 16, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "trim" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Trim Write Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_MACBOOK-INFLUX}", + "description": "Latency histogram for the disk write operations of a ZFS pool", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 20 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 17, + "legend": { + "show": false + }, + "reverseYBuckets": false, + "targets": [ + { + "alias": "$tag_le", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "le" + ], + "type": "tag" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "zpool_latency", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "disk_write" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + }, + { + "params": [ + "1s" + ], + "type": "non_negative_derivative" + } + ] + ], + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$hostname$/" + }, + { + "condition": "AND", + "key": "name", + "operator": "=~", + "value": "/^$poolname$/" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Write Queue", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "collapsed": false, + "datasource": "${DS_MACBOOK-INFLUX}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 19, + "panels": [], + "title": "About", + "type": "row" + }, + { + "content": "I/O requests that are satisfied by accessing pool devices are managed by the ZIO scheduler.\nThe total latency is measured from the start of the I/O to completion by the disk.\nLatency through each queue is shown prior to its submission to the disk queue.\n\nThis view is useful for observing the effects of tuning the ZIO scheduler min and max values\n(see zfs-module-parameters(5) and [ZFS on Linux Module Parameters](https://openzfs.github.io/openzfs-docs/Performance%20and%20tuning/ZFS%20on%20Linux%20Module%20Parameters.html)):\n+ *zfs_vdev_max_active* controls the ZIO scheduler's disk queue depth (do not confuse with the block device's nr_requests)\n+ *zfs_vdev_sync_read_min_active* and *zfs_vdev_sync_read_max_active* control the synchronous queue for reads: most reads are sync\n+ *zfs_vdev_sync_write_min_active* and *zfs_vdev_sync_write_max_active* control the synchronous queue for writes: \nusually metadata or user data depending on the \"sync\" property setting or I/Os that are requested to be flushed\n+ *zfs_vdev_async_read_min_active* and *zfs_vdev_async_read_max_active* control the asynchronous queue for reads: usually prefetches\n+ *zfs_vdev_async_write_min_active* and *zfs_vdev_async_write_max_active* control the asynchronous queue for writes: \nusually the bulk of all writes at transaction group (txg) commit\n+ *zfs_vdev_scrub_min_active* and *zfs_vdev_scrub_max_active* controls the scan reads: usually scrub or resilver\n\n", + "datasource": "${DS_MACBOOK-INFLUX}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 16, + "x": 0, + "y": 29 + }, + "id": 21, + "mode": "markdown", + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "About ZFS Pool All Queues Read/Write Latency Histograms", + "type": "text" + }, + { + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "composites": [], + "content": "graph LR\nIO((I/O request)) --> SR(sync read queue)\nIO --> SW(sync write queue)\nIO --> AR(async read queue)\nIO --> AW(async write queue)\nIO --> SCRUB(scrub queue)\nIO --> TRIM(trim queue)\nSR --> DISKQ(disk queue)\nSW --> DISKQ\nAR --> DISKQ\nAW --> DISKQ\nSCRUB --> DISKQ\nTRIM --> DISKQ\nDISKQ --> DISK((disk))\n", + "datasource": "${DS_MACBOOK-INFLUX}", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "graphId": "diagram_23", + "gridPos": { + "h": 15, + "w": 7, + "x": 16, + "y": 29 + }, + "id": 23, + "init": { + "arrowMarkerAbsolute": true, + "cloneCssStyles": true, + "flowchart": { + "htmlLabels": true, + "useMaxWidth": true + }, + "gantt": { + "barGap": 4, + "barHeight": 20, + "fontFamily": "\"Open-Sans\", \"sans-serif\"", + "fontSize": 11, + "gridLineStartPadding": 35, + "leftPadding": 75, + "numberSectionStyles": 3, + "titleTopMargin": 25, + "topPadding": 50 + }, + "logLevel": 3, + "securityLevel": "loose", + "sequence": { + "actorMargin": 50, + "bottomMarginAdj": 1, + "boxMargin": 10, + "boxTextMargin": 5, + "diagramMarginX": 50, + "diagramMarginY": 10, + "height": 65, + "messageMargin": 35, + "mirrorActors": true, + "noteMargin": 10, + "useMaxWidth": true, + "width": 150 + }, + "startOnLoad": false, + "theme": "dark" + }, + "legend": { + "avg": true, + "current": true, + "gradient": { + "enabled": true, + "show": true + }, + "max": true, + "min": true, + "show": false, + "total": true + }, + "mappingType": 1, + "mappingTypes": [ + { + "$$hashKey": "object:155", + "name": "value to text", + "value": 1 + }, + { + "$$hashKey": "object:156", + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxWidth": false, + "mermaidServiceUrl": "", + "metricCharacterReplacements": [], + "moddedSeriesVal": 0, + "mode": "content", + "nullPointMode": "connected", + "seriesOverrides": [], + "style": "", + "styleValues": {}, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": true, + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "themes": [ + "default", + "dark", + "forest", + "neutral" + ], + "thresholds": "0,10", + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "type": "jdbranham-diagram-panel", + "valueMaps": [ + { + "$$hashKey": "object:151", + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg", + "valueOptions": [ + "avg", + "min", + "max", + "total", + "current" + ] + } + ], + "refresh": false, + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ZFS", + "Latency", + "Histogram" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_MACBOOK-INFLUX}", + "definition": "show tag values from \"zpool_latency\" with key = \"host\"", + "hide": 0, + "includeAll": false, + "index": -1, + "label": null, + "multi": false, + "name": "hostname", + "options": [], + "query": "show tag values from \"zpool_latency\" with key = \"host\"", + "refresh": 1, + "regex": "/([-a-zA-Z-0-9]+)/", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_MACBOOK-INFLUX}", + "definition": "show tag values from \"zpool_latency\" with key = \"name\" where \"host\" =~ /^$hostname/", + "hide": 0, + "includeAll": false, + "index": -1, + "label": null, + "multi": false, + "name": "poolname", + "options": [], + "query": "show tag values from \"zpool_latency\" with key = \"name\" where \"host\" =~ /^$hostname/", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "2020-05-25T21:34:30.137Z", + "to": "2020-05-25T21:39:54.445Z" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "ZFS Pool Latency Heatmaps Influxdb", + "uid": "TbB4-DkGz", + "variables": { + "list": [] + }, + "version": 2 +} \ No newline at end of file diff --git a/cmd/zpool_influxdb/telegraf.d/README.md b/cmd/zpool_influxdb/telegraf.d/README.md new file mode 100644 index 000000000..74f411a15 --- /dev/null +++ b/cmd/zpool_influxdb/telegraf.d/README.md @@ -0,0 +1,7 @@ +This directory contains sample telegraf configurations for +adding `zpool_influxdb` as an input plugin. Depending on your +telegraf configuration, the installation can be as simple as +copying one of these to the `/etc/telegraf/telegraf.d` directory +and restarting `systemctl restart telegraf` + +See the telegraf docs for more information on input plugins. diff --git a/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf b/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf new file mode 100644 index 000000000..e934b4e88 --- /dev/null +++ b/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf @@ -0,0 +1,15 @@ +# # Read metrics from zpool_influxdb +[[inputs.exec]] +# ## default installation location for zpool_influxdb command + commands = ["/usr/local/bin/zpool_influxdb"] +# ## Timeout for each command to complete. +# timeout = "5s" +# +# ## measurement name suffix (for separating different commands) +# name_suffix = "_mycollector" +# +# ## Data format to consume. +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" diff --git a/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf b/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf new file mode 100644 index 000000000..e56c51fc1 --- /dev/null +++ b/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf @@ -0,0 +1,23 @@ +# # Read metrics from zpool_influxdb +[[inputs.execd]] +# ## default installation location for zpool_influxdb command + command = ["/usr/local/bin/zpool_influxdb", "--execd"] + + ## Define how the process is signaled on each collection interval. + ## Valid values are: + ## "none" : Do not signal anything. (Recommended for service inputs) + ## The process must output metrics by itself. + ## "STDIN" : Send a newline on STDIN. (Recommended for gather inputs) + ## "SIGHUP" : Send a HUP signal. Not available on Windows. (not recommended) + ## "SIGUSR1" : Send a USR1 signal. Not available on Windows. + ## "SIGUSR2" : Send a USR2 signal. Not available on Windows. + signal = "STDIN" + + ## Delay before the process is restarted after an unexpected termination + restart_delay = "10s" + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" diff --git a/cmd/zpool_influxdb/zpool_influxdb.c b/cmd/zpool_influxdb/zpool_influxdb.c new file mode 100644 index 000000000..71ffcb253 --- /dev/null +++ b/cmd/zpool_influxdb/zpool_influxdb.c @@ -0,0 +1,843 @@ +/* + * Gather top-level ZFS pool and resilver/scan statistics and print using + * influxdb line protocol + * usage: [options] [pool_name] + * where options are: + * --execd, -e run in telegraf execd input plugin mode, [CR] on + * stdin causes a sample to be printed and wait for + * the next [CR] + * --no-histograms, -n don't print histogram data (reduces cardinality + * if you don't care about histograms) + * --sum-histogram-buckets, -s sum histogram bucket values + * + * To integrate into telegraf use one of: + * 1. the `inputs.execd` plugin with the `--execd` option + * 2. the `inputs.exec` plugin to simply run with no options + * + * NOTE: libzfs is an unstable interface. YMMV. + * + * The design goals of this software include: + * + be as lightweight as possible + * + reduce the number of external dependencies as far as possible, hence + * there is no dependency on a client library for managing the metric + * collection -- info is printed, KISS + * + broken pools or kernel bugs can cause this process to hang in an + * unkillable state. For this reason, it is best to keep the damage limited + * to a small process like zpool_influxdb rather than a larger collector. + * + * Copyright 2018-2020 Richard Elling + * + * This software is dual-licensed MIT and CDDL. + * + * The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License Version 1.0 (CDDL-1.0). + * You can obtain a copy of the license from the top-level file + * "OPENSOLARIS.LICENSE" or at . + * You may not use this file except in compliance with the license. + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * CDDL HEADER END + */ +#include +#include +#include +#include +#include +#include + +#define POOL_MEASUREMENT "zpool_stats" +#define SCAN_MEASUREMENT "zpool_scan_stats" +#define VDEV_MEASUREMENT "zpool_vdev_stats" +#define POOL_LATENCY_MEASUREMENT "zpool_latency" +#define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" +#define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ +#define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" +#define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ + +/* global options */ +int execd_mode = 0; +int no_histograms = 0; +int sum_histogram_buckets = 0; +char metric_data_type = 'u'; +uint64_t metric_value_mask = UINT64_MAX; +uint64_t timestamp = 0; +int complained_about_sync = 0; +char *tags = ""; + +typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); + +/* + * influxdb line protocol rules for escaping are important because the + * zpool name can include characters that need to be escaped + * + * caller is responsible for freeing result + */ +static char * +escape_string(char *s) +{ + char *c, *d; + char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2); + if (t == NULL) { + fprintf(stderr, "error: cannot allocate memory\n"); + exit(1); + } + + for (c = s, d = t; *c != '\0'; c++, d++) { + switch (*c) { + case ' ': + case ',': + case '=': + case '\\': + *d++ = '\\'; + default: + *d = *c; + } + } + *d = '\0'; + return (t); +} + +/* + * print key=value where value is a uint64_t + */ +static void +print_kv(char *key, uint64_t value) +{ + printf("%s=%llu%c", key, + (u_longlong_t)value & metric_value_mask, metric_data_type); +} + +/* + * print_scan_status() prints the details as often seen in the "zpool status" + * output. However, unlike the zpool command, which is intended for humans, + * this output is suitable for long-term tracking in influxdb. + * TODO: update to include issued scan data + */ +static int +print_scan_status(nvlist_t *nvroot, const char *pool_name) +{ + uint_t c; + int64_t elapsed; + uint64_t examined, pass_exam, paused_time, paused_ts, rate; + uint64_t remaining_time; + pool_scan_stat_t *ps = NULL; + double pct_done; + char *state[DSS_NUM_STATES] = { + "none", "scanning", "finished", "canceled"}; + char *func; + + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &c); + + /* + * ignore if there are no stats + */ + if (ps == NULL) + return (0); + + /* + * return error if state is bogus + */ + if (ps->pss_state >= DSS_NUM_STATES || + ps->pss_func >= POOL_SCAN_FUNCS) { + if (complained_about_sync % 1000 == 0) { + fprintf(stderr, "error: cannot decode scan stats: " + "ZFS is out of sync with compiled zpool_influxdb"); + complained_about_sync++; + } + return (1); + } + + switch (ps->pss_func) { + case POOL_SCAN_NONE: + func = "none_requested"; + break; + case POOL_SCAN_SCRUB: + func = "scrub"; + break; + case POOL_SCAN_RESILVER: + func = "resilver"; + break; +#ifdef POOL_SCAN_REBUILD + case POOL_SCAN_REBUILD: + func = "rebuild"; + break; +#endif + default: + func = "scan"; + } + + /* overall progress */ + examined = ps->pss_examined ? ps->pss_examined : 1; + pct_done = 0.0; + if (ps->pss_to_examine > 0) + pct_done = 100.0 * examined / ps->pss_to_examine; + +#ifdef EZFS_SCRUB_PAUSED + paused_ts = ps->pss_pass_scrub_pause; + paused_time = ps->pss_pass_scrub_spent_paused; +#else + paused_ts = 0; + paused_time = 0; +#endif + + /* calculations for this pass */ + if (ps->pss_state == DSS_SCANNING) { + elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start - + (int64_t)paused_time; + elapsed = (elapsed > 0) ? elapsed : 1; + pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; + rate = pass_exam / elapsed; + rate = (rate > 0) ? rate : 1; + remaining_time = ps->pss_to_examine - examined / rate; + } else { + elapsed = + (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start - + (int64_t)paused_time; + elapsed = (elapsed > 0) ? elapsed : 1; + pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; + rate = pass_exam / elapsed; + remaining_time = 0; + } + rate = rate ? rate : 1; + + /* influxdb line protocol format: "tags metrics timestamp" */ + printf("%s%s,function=%s,name=%s,state=%s ", + SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]); + print_kv("end_ts", ps->pss_end_time); + print_kv(",errors", ps->pss_errors); + print_kv(",examined", examined); + print_kv(",issued", ps->pss_issued); + print_kv(",pass_examined", pass_exam); + print_kv(",pass_issued", ps->pss_pass_issued); + print_kv(",paused_ts", paused_ts); + print_kv(",paused_t", paused_time); + printf(",pct_done=%.2f", pct_done); + print_kv(",processed", ps->pss_processed); + print_kv(",rate", rate); + print_kv(",remaining_t", remaining_time); + print_kv(",start_ts", ps->pss_start_time); + print_kv(",to_examine", ps->pss_to_examine); + print_kv(",to_process", ps->pss_to_process); + printf(" %llu\n", (u_longlong_t)timestamp); + return (0); +} + +/* + * get a vdev name that corresponds to the top-level vdev names + * printed by `zpool status` + */ +static char * +get_vdev_name(nvlist_t *nvroot, const char *parent_name) +{ + static char vdev_name[256]; + char *vdev_type = NULL; + uint64_t vdev_id = 0; + + if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, + &vdev_type) != 0) { + vdev_type = "unknown"; + } + if (nvlist_lookup_uint64( + nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { + vdev_id = UINT64_MAX; + } + if (parent_name == NULL) { + (void) snprintf(vdev_name, sizeof (vdev_name), "%s", + vdev_type); + } else { + (void) snprintf(vdev_name, sizeof (vdev_name), + "%s/%s-%llu", + parent_name, vdev_type, (u_longlong_t)vdev_id); + } + return (vdev_name); +} + +/* + * get a string suitable for an influxdb tag that describes this vdev + * + * By default only the vdev hierarchical name is shown, separated by '/' + * If the vdev has an associated path, which is typical of leaf vdevs, + * then the path is added. + * It would be nice to have the devid instead of the path, but under + * Linux we cannot be sure a devid will exist and we'd rather have + * something than nothing, so we'll use path instead. + */ +static char * +get_vdev_desc(nvlist_t *nvroot, const char *parent_name) +{ + static char vdev_desc[2 * MAXPATHLEN]; + char *vdev_type = NULL; + uint64_t vdev_id = 0; + char vdev_value[MAXPATHLEN]; + char *vdev_path = NULL; + char *s, *t; + + if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { + vdev_type = "unknown"; + } + if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { + vdev_id = UINT64_MAX; + } + if (nvlist_lookup_string( + nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { + vdev_path = NULL; + } + + if (parent_name == NULL) { + s = escape_string(vdev_type); + (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s); + free(s); + } else { + s = escape_string((char *)parent_name); + t = escape_string(vdev_type); + (void) snprintf(vdev_value, sizeof (vdev_value), + "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id); + free(s); + free(t); + } + if (vdev_path == NULL) { + (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s", + vdev_value); + } else { + s = escape_string(vdev_path); + (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s", + s, vdev_value); + free(s); + } + return (vdev_desc); +} + +/* + * vdev summary stats are a combination of the data shown by + * `zpool status` and `zpool list -v` + */ +static int +print_summary_stats(nvlist_t *nvroot, const char *pool_name, + const char *parent_name) +{ + uint_t c; + vdev_stat_t *vs; + char *vdev_desc = NULL; + vdev_desc = get_vdev_desc(nvroot, parent_name); + if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) != 0) { + return (1); + } + printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags, + pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state, + (vdev_aux_t)vs->vs_aux), vdev_desc); + print_kv("alloc", vs->vs_alloc); + print_kv(",free", vs->vs_space - vs->vs_alloc); + print_kv(",size", vs->vs_space); + print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]); + print_kv(",read_errors", vs->vs_read_errors); + print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]); + print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]); + print_kv(",write_errors", vs->vs_write_errors); + print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]); + print_kv(",checksum_errors", vs->vs_checksum_errors); + print_kv(",fragmentation", vs->vs_fragmentation); + printf(" %llu\n", (u_longlong_t)timestamp); + return (0); +} + +/* + * vdev latency stats are histograms stored as nvlist arrays of uint64. + * Latency stats include the ZIO scheduler classes plus lower-level + * vdev latencies. + * + * In many cases, the top-level "root" view obscures the underlying + * top-level vdev operations. For example, if a pool has a log, special, + * or cache device, then each can behave very differently. It is useful + * to see how each is responding. + */ +static int +print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, + const char *parent_name) +{ + uint_t c, end = 0; + nvlist_t *nv_ex; + char *vdev_desc = NULL; + + /* short_names become part of the metric name and are influxdb-ready */ + struct lat_lookup { + char *name; + char *short_name; + uint64_t sum; + uint64_t *array; + }; + struct lat_lookup lat_type[] = { + {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, + {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, + {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, + {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, + {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, + {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, + {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, + {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, + {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, +#ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO + {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, +#endif + {NULL, NULL} + }; + + if (nvlist_lookup_nvlist(nvroot, + ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { + return (6); + } + + vdev_desc = get_vdev_desc(nvroot, parent_name); + + for (int i = 0; lat_type[i].name; i++) { + if (nvlist_lookup_uint64_array(nv_ex, + lat_type[i].name, &lat_type[i].array, &c) != 0) { + fprintf(stderr, "error: can't get %s\n", + lat_type[i].name); + return (3); + } + /* end count count, all of the arrays are the same size */ + end = c - 1; + } + + for (int bucket = 0; bucket <= end; bucket++) { + if (bucket < MIN_LAT_INDEX) { + /* don't print, but collect the sum */ + for (int i = 0; lat_type[i].name; i++) { + lat_type[i].sum += lat_type[i].array[bucket]; + } + continue; + } + if (bucket < end) { + printf("%s%s,le=%0.6f,name=%s,%s ", + POOL_LATENCY_MEASUREMENT, tags, + (float)(1ULL << bucket) * 1e-9, + pool_name, vdev_desc); + } else { + printf("%s%s,le=+Inf,name=%s,%s ", + POOL_LATENCY_MEASUREMENT, tags, pool_name, + vdev_desc); + } + for (int i = 0; lat_type[i].name; i++) { + if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { + lat_type[i].sum += lat_type[i].array[bucket]; + } else { + lat_type[i].sum = lat_type[i].array[bucket]; + } + print_kv(lat_type[i].short_name, lat_type[i].sum); + if (lat_type[i + 1].name != NULL) { + printf(","); + } + } + printf(" %llu\n", (u_longlong_t)timestamp); + } + return (0); +} + +/* + * vdev request size stats are histograms stored as nvlist arrays of uint64. + * Request size stats include the ZIO scheduler classes plus lower-level + * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. + * + * In many cases, the top-level "root" view obscures the underlying + * top-level vdev operations. For example, if a pool has a log, special, + * or cache device, then each can behave very differently. It is useful + * to see how each is responding. + */ +static int +print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, + const char *parent_name) +{ + uint_t c, end = 0; + nvlist_t *nv_ex; + char *vdev_desc = NULL; + + /* short_names become the field name */ + struct size_lookup { + char *name; + char *short_name; + uint64_t sum; + uint64_t *array; + }; + struct size_lookup size_type[] = { + {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, + {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, + {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, + {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, + {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, + {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, + {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, + {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, + {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, + {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, +#ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO + {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, + {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, +#endif + {NULL, NULL} + }; + + if (nvlist_lookup_nvlist(nvroot, + ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { + return (6); + } + + vdev_desc = get_vdev_desc(nvroot, parent_name); + + for (int i = 0; size_type[i].name; i++) { + if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name, + &size_type[i].array, &c) != 0) { + fprintf(stderr, "error: can't get %s\n", + size_type[i].name); + return (3); + } + /* end count count, all of the arrays are the same size */ + end = c - 1; + } + + for (int bucket = 0; bucket <= end; bucket++) { + if (bucket < MIN_SIZE_INDEX) { + /* don't print, but collect the sum */ + for (int i = 0; size_type[i].name; i++) { + size_type[i].sum += size_type[i].array[bucket]; + } + continue; + } + + if (bucket < end) { + printf("%s%s,le=%llu,name=%s,%s ", + POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket, + pool_name, vdev_desc); + } else { + printf("%s%s,le=+Inf,name=%s,%s ", + POOL_IO_SIZE_MEASUREMENT, tags, pool_name, + vdev_desc); + } + for (int i = 0; size_type[i].name; i++) { + if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { + size_type[i].sum += size_type[i].array[bucket]; + } else { + size_type[i].sum = size_type[i].array[bucket]; + } + print_kv(size_type[i].short_name, size_type[i].sum); + if (size_type[i + 1].name != NULL) { + printf(","); + } + } + printf(" %llu\n", (u_longlong_t)timestamp); + } + return (0); +} + +/* + * ZIO scheduler queue stats are stored as gauges. This is unfortunate + * because the values can change very rapidly and any point-in-time + * value will quickly be obsoleted. It is also not easy to downsample. + * Thus only the top-level queue stats might be beneficial... maybe. + */ +static int +print_queue_stats(nvlist_t *nvroot, const char *pool_name, + const char *parent_name) +{ + nvlist_t *nv_ex; + uint64_t value; + + /* short_names are used for the field name */ + struct queue_lookup { + char *name; + char *short_name; + }; + struct queue_lookup queue_type[] = { + {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, + {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, + {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, + {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, + {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, + {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, + {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, + {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, + {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, + {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, + {NULL, NULL} + }; + + if (nvlist_lookup_nvlist(nvroot, + ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { + return (6); + } + + printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name, + get_vdev_desc(nvroot, parent_name)); + for (int i = 0; queue_type[i].name; i++) { + if (nvlist_lookup_uint64(nv_ex, + queue_type[i].name, &value) != 0) { + fprintf(stderr, "error: can't get %s\n", + queue_type[i].name); + return (3); + } + print_kv(queue_type[i].short_name, value); + if (queue_type[i + 1].name != NULL) { + printf(","); + } + } + printf(" %llu\n", (u_longlong_t)timestamp); + return (0); +} + +/* + * top-level vdev stats are at the pool level + */ +static int +print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) +{ + nvlist_t *nv_ex; + uint64_t value; + + /* short_names become part of the metric name */ + struct queue_lookup { + char *name; + char *short_name; + }; + struct queue_lookup queue_type[] = { + {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, + {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, + {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, + {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, + {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, + {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, + {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, + {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, + {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, + {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, + {NULL, NULL} + }; + + if (nvlist_lookup_nvlist(nvroot, + ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { + return (6); + } + + printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags, + pool_name); + for (int i = 0; queue_type[i].name; i++) { + if (nvlist_lookup_uint64(nv_ex, + queue_type[i].name, &value) != 0) { + fprintf(stderr, "error: can't get %s\n", + queue_type[i].name); + return (3); + } + if (i > 0) + printf(","); + print_kv(queue_type[i].short_name, value); + } + + printf(" %llu\n", (u_longlong_t)timestamp); + return (0); +} + +/* + * recursive stats printer + */ +static int +print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, + const char *pool_name, const char *parent_name, int descend) +{ + uint_t c, children; + nvlist_t **child; + char vdev_name[256]; + int err; + + err = func(nvroot, pool_name, parent_name); + if (err) + return (err); + + if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + (void) strncpy(vdev_name, get_vdev_name(nvroot, parent_name), + sizeof (vdev_name)); + vdev_name[sizeof (vdev_name) - 1] = '\0'; + + for (c = 0; c < children; c++) { + print_recursive_stats(func, child[c], pool_name, + vdev_name, descend); + } + } + return (0); +} + +/* + * call-back to print the stats from the pool config + * + * Note: if the pool is broken, this can hang indefinitely and perhaps in an + * unkillable state. + */ +static int +print_stats(zpool_handle_t *zhp, void *data) +{ + uint_t c; + int err; + boolean_t missing; + nvlist_t *config, *nvroot; + vdev_stat_t *vs; + struct timespec tv; + char *pool_name; + + /* if not this pool return quickly */ + if (data && + strncmp(data, zhp->zpool_name, ZFS_MAX_DATASET_NAME_LEN) != 0) { + zpool_close(zhp); + return (0); + } + + if (zpool_refresh_stats(zhp, &missing) != 0) { + zpool_close(zhp); + return (1); + } + + config = zpool_get_config(zhp, NULL); + if (clock_gettime(CLOCK_REALTIME, &tv) != 0) + timestamp = (uint64_t)time(NULL) * 1000000000; + else + timestamp = + ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec; + + if (nvlist_lookup_nvlist( + config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { + zpool_close(zhp); + return (2); + } + if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) != 0) { + zpool_close(zhp); + return (3); + } + + pool_name = escape_string(zhp->zpool_name); + err = print_recursive_stats(print_summary_stats, nvroot, + pool_name, NULL, 1); + /* if any of these return an error, skip the rest */ + if (err == 0) + err = print_top_level_vdev_stats(nvroot, pool_name); + + if (no_histograms == 0) { + if (err == 0) + err = print_recursive_stats(print_vdev_latency_stats, nvroot, + pool_name, NULL, 1); + if (err == 0) + err = print_recursive_stats(print_vdev_size_stats, nvroot, + pool_name, NULL, 1); + if (err == 0) + err = print_recursive_stats(print_queue_stats, nvroot, + pool_name, NULL, 0); + } + if (err == 0) + err = print_scan_status(nvroot, pool_name); + + free(pool_name); + zpool_close(zhp); + return (err); +} + +static void +usage(char *name) +{ + fprintf(stderr, "usage: %s [--execd][--no-histograms]" + "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name); + exit(EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + int opt; + int ret = 8; + char *line = NULL; + size_t len, tagslen = 0; + struct option long_options[] = { + {"execd", no_argument, NULL, 'e'}, + {"help", no_argument, NULL, 'h'}, + {"no-histograms", no_argument, NULL, 'n'}, + {"signed-int", no_argument, NULL, 'i'}, + {"sum-histogram-buckets", no_argument, NULL, 's'}, + {"tags", required_argument, NULL, 't'}, + {0, 0, 0, 0} + }; + while ((opt = getopt_long( + argc, argv, "ehinst:", long_options, NULL)) != -1) { + switch (opt) { + case 'e': + execd_mode = 1; + break; + case 'i': + metric_data_type = 'i'; + metric_value_mask = INT64_MAX; + break; + case 'n': + no_histograms = 1; + break; + case 's': + sum_histogram_buckets = 1; + break; + case 't': + tagslen = strlen(optarg) + 2; + tags = calloc(tagslen, 1); + if (tags == NULL) { + fprintf(stderr, + "error: cannot allocate memory " + "for tags\n"); + exit(1); + } + (void) snprintf(tags, tagslen, ",%s", optarg); + break; + default: + usage(argv[0]); + } + } + + libzfs_handle_t *g_zfs; + if ((g_zfs = libzfs_init()) == NULL) { + fprintf(stderr, + "error: cannot initialize libzfs. " + "Is the zfs module loaded or zrepl running?\n"); + exit(EXIT_FAILURE); + } + if (execd_mode == 0) { + ret = zpool_iter(g_zfs, print_stats, argv[optind]); + return (ret); + } + while (getline(&line, &len, stdin) != -1) { + ret = zpool_iter(g_zfs, print_stats, argv[optind]); + fflush(stdout); + } + return (ret); +} diff --git a/configure.ac b/configure.ac index cd822abc0..47f0f2f50 100644 --- a/configure.ac +++ b/configure.ac @@ -86,6 +86,7 @@ AC_CONFIG_FILES([ cmd/ztest/Makefile cmd/zvol_id/Makefile cmd/zvol_wait/Makefile + cmd/zpool_influxdb/Makefile contrib/Makefile contrib/bash_completion.d/Makefile contrib/bpftrace/Makefile @@ -394,6 +395,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/vdev_zaps/Makefile tests/zfs-tests/tests/functional/write_dirs/Makefile tests/zfs-tests/tests/functional/xattr/Makefile + tests/zfs-tests/tests/functional/zpool_influxdb/Makefile tests/zfs-tests/tests/functional/zvol/Makefile tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/Makefile tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am index 07f6aefa6..602645180 100644 --- a/man/man8/Makefile.am +++ b/man/man8/Makefile.am @@ -82,7 +82,8 @@ dist_man_MANS = \ zpool-upgrade.8 \ zpool-wait.8 \ zstream.8 \ - zstreamdump.8 + zstreamdump.8 \ + zpool_influxdb.8 nodist_man_MANS = \ zed.8 \ diff --git a/man/man8/zpool_influxdb.8 b/man/man8/zpool_influxdb.8 new file mode 100644 index 000000000..bd899dbe9 --- /dev/null +++ b/man/man8/zpool_influxdb.8 @@ -0,0 +1,93 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at +.\" https://opensource.org/licenses/CDDL-1.0 +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright 2020 Richard Elling +.\" .Dd June 14, 2020 +.TH zpool_influxdb 8 +.SH NAME +zpool_influxdb \- collect zpool statistics in influxdb line protocol format +.SH SYNOPSIS +.LP +.nf +\fBzpool_influxdb\fR [--execd] [--no-histogram] [--sum-histogram-buckets] +[--tags key=value] [pool] +\fBzpool_influxdb\fR --help +.fi +.SH DESCRIPTION +The \fBzpool_influxdb\fR command produces influxdb line protocol compatible +metrics from zpools. Like the \fBzpool\fR command, \fBzpool_influxdb\fR +reads the current pool status and statistics. Unlike the \fBzpool\fR +command which is intended for humans, \fBzpool_influxdb\fR formats the +output in influxdb line protocol. The expected use is as a plugin to a +metrics collector or aggregator, such as telegraf. + +By default, \fBzpool_influxdb\fR prints pool metrics and status in the +influxdb line protocol format. All pools are printed, similar to +the \fBzpool status\fR command. Providing a pool name restricts the +output to the named pool. + +Like the \fBzpool\fR command, \fBzpool_influxdb\fR uses internal data +structures that can change over time as new ZFS releases are made. +Therefore, the \fBzpool_influxdb\fR command must be compiled against the +ZFS source. It is expected that later releases of ZFS includes compatible +\fBzpool_influxdb\fR and \fBzpool\fR commands. + +.SH OPTIONS +.TP +\fB\--execd\fR, \fB-e\fR +Run in daemon mode compatible with telegraf`s \fBexecd\fR plugin. +In this mode, the pools are sampled every time there is a [return] on stdin. +Once a sample printed, \fBzpool_influxdb\fR waits for another [return]. +When run on a terminal, use [ctrl+C] to exit. +.TP +\fB\--no-histogram\fR, \fB-n\fR +Do not print latency and I/O size histograms. This can reduce the total +amount of data, but one should consider the value brought by the insights +that latency and I/O size distributions provide. The resulting values +are suitable for graphing with grafana's heatmap plugin. +.TP +\fB--sum-histogram-buckets\fR, \fB-s\fR +Accumulates bucket values. By default, the values are not accumulated and +the raw data appears as shown by \fBzpool iostat\fR. This works well for +grafana's heatmap plugin. Summing the buckets produces output similar to +prometheus histograms. +.TP +\fB--tags\fR, \fB-t\fR +Adds specified tags to the tag set. Tags are key=value pairs and multiple +tags are separated by commas. No sanity checking is performed. +See the InfluxDB Line Protocol format documentation for details on escaping +special characters used in tags. +.TP +\fB\--help\fR, \fB\-h\fR +Print a usage summary. + +.SH SEE ALSO +.LP +\fBzpool-status\fR(8) +\fBzpool-iostat\fR(8) +.PP +Influxdb https://github.com/influxdata/influxdb +.PP +Telegraf https://github.com/influxdata/telegraf +.PP +Grafana https://grafana.com +.PP +Prometheus https://prometheus.io diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 86e983718..1ce47e1a1 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -439,6 +439,7 @@ systemctl --system daemon-reload >/dev/null || true %{_bindir}/raidz_test %{_bindir}/zgenhostid %{_bindir}/zvol_wait +%{_bindir}/zpool_influxdb # Optional Python 2/3 scripts %{_bindir}/arc_summary %{_bindir}/arcstat diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 228bd5eac..d935f5b04 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -905,3 +905,6 @@ tests = ['l2arc_arcstats_pos', 'l2arc_mfuonly_pos', 'persist_l2arc_006_pos', 'persist_l2arc_007_pos', 'persist_l2arc_008_pos'] tags = ['functional', 'l2arc'] +[tests/functional/zpool_influxdb] +tests = ['zpool_influxdb'] +tags = ['functional', 'zpool_influxdb'] diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 5a507b94a..21d1950e7 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -188,7 +188,8 @@ export ZFS_FILES='zdb zgenhostid zstream zstreamdump - zfs_ids_to_path' + zfs_ids_to_path + zpool_influxdb' export ZFSTEST_FILES='badsend btree_test diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index 6438898e4..3a5b7b0b9 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -82,6 +82,7 @@ SUBDIRS = \ vdev_zaps \ write_dirs \ xattr \ + zpool_influxdb \ zvol if BUILD_LINUX diff --git a/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am b/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am new file mode 100644 index 000000000..36d08a41a --- /dev/null +++ b/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am @@ -0,0 +1,5 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/zpool_influxdb +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_influxdb.ksh diff --git a/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh b/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh new file mode 100755 index 000000000..a8cd2e4b6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh @@ -0,0 +1,29 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at +# https://opensource.org/licenses/CDDL-1.0 +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2020 Richard Elling +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh b/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh new file mode 100755 index 000000000..43f2c8c20 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh @@ -0,0 +1,29 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at +# https://opensource.org/licenses/CDDL-1.0 +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2020 Richard Elling +# + +. $STF_SUITE/include/libtest.shlib + +default_raidz_setup $DISKS diff --git a/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh b/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh new file mode 100755 index 000000000..495a4a38b --- /dev/null +++ b/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at +# https://opensource.org/licenses/CDDL-1.0 +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2020 Richard Elling +# + +. $STF_SUITE/include/libtest.shlib + +typeset tmpfile=$TEST_BASE_DIR/zpool_influxdb.out.$$ +function cleanup +{ + if [[ -f $tmpfile ]]; then + rm -f $tmpfile + fi +} +log_onexit cleanup + +log_assert "zpool_influxdb gathers statistics" + +if ! is_global_zone ; then + TESTPOOL=${TESTPOOL%%/*} +fi + +function check_for +{ + grep "^${1}," $tmpfile >/dev/null 2>/dev/null + if [ $? -ne 0 ]; then + log_fail "cannot find stats for $1" + fi +} + +# by default, all stats and histograms for all pools +log_must zpool_influxdb > $tmpfile + +STATS=" +zpool_io_size +zpool_latency +zpool_stats +zpool_vdev_queue +zpool_vdev_stats +" +for stat in $STATS; do + check_for $stat +done + +# scan stats aren't expected to be there until after a scan has started +zpool scrub $TESTPOOL +zpool_influxdb > $tmpfile +check_for zpool_scan_stats + +log_pass "zpool_influxdb gathers statistics"