mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 01:51:00 +03:00
7ddc1f737f
There's no good way to tell when a ZIL commit fails and falls back to a transaction sync, other than perhaps a throughput drop. This adds counters so we can see when it happens and why. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov>
555 lines
16 KiB
Plaintext
Executable File
555 lines
16 KiB
Plaintext
Executable File
#!/usr/bin/env @PYTHON_SHEBANG@
|
|
#
|
|
# Print out statistics for all zil stats. This information is
|
|
# available through the zil kstat.
|
|
#
|
|
# CDDL HEADER START
|
|
#
|
|
# The contents of this file are subject to the terms of the
|
|
# Common Development and Distribution License, Version 1.0 only
|
|
# (the "License"). You may not use this file except in compliance
|
|
# with the License.
|
|
#
|
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
# or https://opensource.org/licenses/CDDL-1.0.
|
|
# See the License for the specific language governing permissions
|
|
# and limitations under the License.
|
|
#
|
|
# When distributing Covered Code, include this CDDL HEADER in each
|
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
# If applicable, add the following below this CDDL HEADER, with the
|
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
|
#
|
|
# This script must remain compatible with Python 3.6+.
|
|
#
|
|
|
|
import sys
|
|
import subprocess
|
|
import time
|
|
import copy
|
|
import os
|
|
import re
|
|
import signal
|
|
from collections import defaultdict
|
|
import argparse
|
|
from argparse import RawTextHelpFormatter
|
|
|
|
cols = {
|
|
# hdr: [size, scale, kstat name]
|
|
"time": [8, -1, "time"],
|
|
"pool": [12, -1, "pool"],
|
|
"ds": [12, -1, "dataset_name"],
|
|
"obj": [12, -1, "objset"],
|
|
"cc": [5, 1000, "zil_commit_count"],
|
|
"cwc": [5, 1000, "zil_commit_writer_count"],
|
|
"cec": [5, 1000, "zil_commit_error_count"],
|
|
"csc": [5, 1000, "zil_commit_stall_count"],
|
|
"cSc": [5, 1000, "zil_commit_suspend_count"],
|
|
"ic": [5, 1000, "zil_itx_count"],
|
|
"iic": [5, 1000, "zil_itx_indirect_count"],
|
|
"iib": [5, 1024, "zil_itx_indirect_bytes"],
|
|
"icc": [5, 1000, "zil_itx_copied_count"],
|
|
"icb": [5, 1024, "zil_itx_copied_bytes"],
|
|
"inc": [5, 1000, "zil_itx_needcopy_count"],
|
|
"inb": [5, 1024, "zil_itx_needcopy_bytes"],
|
|
"idc": [5, 1000, "icc+inc"],
|
|
"idb": [5, 1024, "icb+inb"],
|
|
"iwc": [5, 1000, "iic+idc"],
|
|
"iwb": [5, 1024, "iib+idb"],
|
|
"imnc": [6, 1000, "zil_itx_metaslab_normal_count"],
|
|
"imnb": [6, 1024, "zil_itx_metaslab_normal_bytes"],
|
|
"imnw": [6, 1024, "zil_itx_metaslab_normal_write"],
|
|
"imna": [6, 1024, "zil_itx_metaslab_normal_alloc"],
|
|
"imsc": [6, 1000, "zil_itx_metaslab_slog_count"],
|
|
"imsb": [6, 1024, "zil_itx_metaslab_slog_bytes"],
|
|
"imsw": [6, 1024, "zil_itx_metaslab_slog_write"],
|
|
"imsa": [6, 1024, "zil_itx_metaslab_slog_alloc"],
|
|
"imc": [5, 1000, "imnc+imsc"],
|
|
"imb": [5, 1024, "imnb+imsb"],
|
|
"imw": [5, 1024, "imnw+imsw"],
|
|
"ima": [5, 1024, "imna+imsa"],
|
|
"se%": [3, 100, "imb/ima"],
|
|
"sen%": [4, 100, "imnb/imna"],
|
|
"ses%": [4, 100, "imsb/imsa"],
|
|
"te%": [3, 100, "imb/imw"],
|
|
"ten%": [4, 100, "imnb/imnw"],
|
|
"tes%": [4, 100, "imsb/imsw"],
|
|
}
|
|
|
|
hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
|
|
"imnc", "imnw", "imsc", "imsw"]
|
|
|
|
ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
|
|
"imnc", "imnw", "imsc", "imsw"]
|
|
|
|
cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")
|
|
|
|
curr = {}
|
|
diff = {}
|
|
kstat = {}
|
|
ds_pairs = {}
|
|
pool_name = None
|
|
dataset_name = None
|
|
interval = 0
|
|
sep = " "
|
|
gFlag = True
|
|
dsFlag = False
|
|
|
|
def prettynum(sz, scale, num=0):
|
|
suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
|
|
index = 0
|
|
save = 0
|
|
|
|
if scale == -1:
|
|
return "%*s" % (sz, num)
|
|
|
|
# Rounding error, return 0
|
|
elif 0 < num < 1:
|
|
num = 0
|
|
|
|
while num > scale and index < 5:
|
|
save = num
|
|
num = num / scale
|
|
index += 1
|
|
|
|
if index == 0:
|
|
return "%*d" % (sz, num)
|
|
|
|
if (save / scale) < 10:
|
|
return "%*.1f%s" % (sz - 1, num, suffix[index])
|
|
else:
|
|
return "%*d%s" % (sz - 1, num, suffix[index])
|
|
|
|
def print_header():
|
|
global hdr
|
|
global sep
|
|
for col in hdr:
|
|
new_col = col
|
|
if interval > 0 and cols[col][1] > 100:
|
|
new_col += "/s"
|
|
sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
|
|
sys.stdout.write("\n")
|
|
|
|
def print_values(v):
|
|
global hdr
|
|
global sep
|
|
for col in hdr:
|
|
val = v[cols[col][2]]
|
|
if interval > 0 and cols[col][1] > 100:
|
|
val = v[cols[col][2]] // interval
|
|
sys.stdout.write("%s%s" % (
|
|
prettynum(cols[col][0], cols[col][1], val), sep))
|
|
sys.stdout.write("\n")
|
|
|
|
def print_dict(d):
|
|
for pool in d:
|
|
for objset in d[pool]:
|
|
print_values(d[pool][objset])
|
|
|
|
def detailed_usage():
|
|
sys.stderr.write("%s\n" % cmd)
|
|
sys.stderr.write("Field definitions are as follows:\n")
|
|
for key in cols:
|
|
sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
|
|
sys.stderr.write("\n")
|
|
sys.exit(0)
|
|
|
|
def init():
|
|
global pool_name
|
|
global dataset_name
|
|
global interval
|
|
global hdr
|
|
global curr
|
|
global gFlag
|
|
global sep
|
|
|
|
curr = dict()
|
|
|
|
parser = argparse.ArgumentParser(description='Program to print zilstats',
|
|
add_help=True,
|
|
formatter_class=RawTextHelpFormatter,
|
|
epilog="\nUsage Examples\n"\
|
|
"Note: Global zilstats is shown by default,"\
|
|
" if none of a|p|d option is not provided\n"\
|
|
"\tzilstat -a\n"\
|
|
'\tzilstat -v\n'\
|
|
'\tzilstat -p tank\n'\
|
|
'\tzilstat -d tank/d1,tank/d2,tank/zv1\n'\
|
|
'\tzilstat -i 1\n'\
|
|
'\tzilstat -s \"***\"\n'\
|
|
'\tzilstat -f zcwc,zimnb,zimsb\n')
|
|
|
|
parser.add_argument(
|
|
"-v", "--verbose",
|
|
action="store_true",
|
|
help="List field headers and definitions"
|
|
)
|
|
|
|
pool_grp = parser.add_mutually_exclusive_group()
|
|
|
|
pool_grp.add_argument(
|
|
"-a", "--all",
|
|
action="store_true",
|
|
dest="all",
|
|
help="Print all dataset stats"
|
|
)
|
|
|
|
pool_grp.add_argument(
|
|
"-p", "--pool",
|
|
type=str,
|
|
help="Print stats for all datasets of a speicfied pool"
|
|
)
|
|
|
|
pool_grp.add_argument(
|
|
"-d", "--dataset",
|
|
type=str,
|
|
help="Print given dataset(s) (Comma separated)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-f", "--columns",
|
|
type=str,
|
|
help="Specify specific fields to print (see -v)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-s", "--separator",
|
|
type=str,
|
|
help="Override default field separator with custom "
|
|
"character or string"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-i", "--interval",
|
|
type=int,
|
|
dest="interval",
|
|
help="Print stats between specified interval"
|
|
" (in seconds)"
|
|
)
|
|
|
|
parsed_args = parser.parse_args()
|
|
|
|
if parsed_args.verbose:
|
|
detailed_usage()
|
|
|
|
if parsed_args.all:
|
|
gFlag = False
|
|
|
|
if parsed_args.interval:
|
|
interval = parsed_args.interval
|
|
|
|
if parsed_args.pool:
|
|
pool_name = parsed_args.pool
|
|
gFlag = False
|
|
|
|
if parsed_args.dataset:
|
|
dataset_name = parsed_args.dataset
|
|
gFlag = False
|
|
|
|
if parsed_args.separator:
|
|
sep = parsed_args.separator
|
|
|
|
if gFlag:
|
|
hdr = ghdr
|
|
|
|
if parsed_args.columns:
|
|
hdr = parsed_args.columns.split(",")
|
|
|
|
invalid = []
|
|
for ele in hdr:
|
|
if ele not in cols:
|
|
invalid.append(ele)
|
|
|
|
if len(invalid) > 0:
|
|
sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
|
|
sys.exit(1)
|
|
|
|
if pool_name and dataset_name:
|
|
print ("Error: Can not filter both dataset and pool")
|
|
sys.exit(1)
|
|
|
|
def FileCheck(fname):
|
|
try:
|
|
return (open(fname))
|
|
except IOError:
|
|
print ("Unable to open zilstat proc file: " + fname)
|
|
sys.exit(1)
|
|
|
|
if sys.platform.startswith('freebsd'):
|
|
# Requires py-sysctl on FreeBSD
|
|
import sysctl
|
|
|
|
def kstat_update(pool = None, objid = None):
|
|
global kstat
|
|
kstat = {}
|
|
if not pool:
|
|
file = "kstat.zfs.misc.zil"
|
|
k = [ctl for ctl in sysctl.filter(file) \
|
|
if ctl.type != sysctl.CTLTYPE_NODE]
|
|
kstat_process_str(k, file, "GLOBAL", len(file + "."))
|
|
elif objid:
|
|
file = "kstat.zfs." + pool + ".dataset.objset-" + objid
|
|
k = [ctl for ctl in sysctl.filter(file) if ctl.type \
|
|
!= sysctl.CTLTYPE_NODE]
|
|
kstat_process_str(k, file, objid, len(file + "."))
|
|
else:
|
|
file = "kstat.zfs." + pool + ".dataset"
|
|
zil_start = len(file + ".")
|
|
obj_start = len("kstat.zfs." + pool + ".")
|
|
k = [ctl for ctl in sysctl.filter(file)
|
|
if ctl.type != sysctl.CTLTYPE_NODE]
|
|
for s in k:
|
|
if not s or (s.name.find("zil") == -1 and \
|
|
s.name.find("dataset_name") == -1):
|
|
continue
|
|
name, value = s.name, s.value
|
|
objid = re.findall(r'0x[0-9A-F]+', \
|
|
name[obj_start:], re.I)[0]
|
|
if objid not in kstat:
|
|
kstat[objid] = dict()
|
|
zil_start = len(file + ".objset-" + \
|
|
objid + ".")
|
|
kstat[objid][name[zil_start:]] = value \
|
|
if (name.find("dataset_name")) \
|
|
else int(value)
|
|
|
|
def kstat_process_str(k, file, objset = "GLOBAL", zil_start = 0):
|
|
global kstat
|
|
if not k:
|
|
print("Unable to process kstat for: " + file)
|
|
sys.exit(1)
|
|
kstat[objset] = dict()
|
|
for s in k:
|
|
if not s or (s.name.find("zil") == -1 and \
|
|
s.name.find("dataset_name") == -1):
|
|
continue
|
|
name, value = s.name, s.value
|
|
kstat[objset][name[zil_start:]] = value \
|
|
if (name.find("dataset_name")) else int(value)
|
|
|
|
elif sys.platform.startswith('linux'):
|
|
def kstat_update(pool = None, objid = None):
|
|
global kstat
|
|
kstat = {}
|
|
if not pool:
|
|
k = [line.strip() for line in \
|
|
FileCheck("/proc/spl/kstat/zfs/zil")]
|
|
kstat_process_str(k, "/proc/spl/kstat/zfs/zil")
|
|
elif objid:
|
|
file = "/proc/spl/kstat/zfs/" + pool + "/objset-" + objid
|
|
k = [line.strip() for line in FileCheck(file)]
|
|
kstat_process_str(k, file, objid)
|
|
else:
|
|
if not os.path.exists(f"/proc/spl/kstat/zfs/{pool}"):
|
|
print("Pool \"" + pool + "\" does not exist, Exitting")
|
|
sys.exit(1)
|
|
objsets = os.listdir(f'/proc/spl/kstat/zfs/{pool}')
|
|
for objid in objsets:
|
|
if objid.find("objset-") == -1:
|
|
continue
|
|
file = "/proc/spl/kstat/zfs/" + pool + "/" + objid
|
|
k = [line.strip() for line in FileCheck(file)]
|
|
kstat_process_str(k, file, objid.replace("objset-", ""))
|
|
|
|
def kstat_process_str(k, file, objset = "GLOBAL", zil_start = 0):
|
|
global kstat
|
|
if not k:
|
|
print("Unable to process kstat for: " + file)
|
|
sys.exit(1)
|
|
|
|
kstat[objset] = dict()
|
|
for s in k:
|
|
if not s or (s.find("zil") == -1 and \
|
|
s.find("dataset_name") == -1):
|
|
continue
|
|
name, unused, value = s.split()
|
|
kstat[objset][name] = value \
|
|
if (name == "dataset_name") else int(value)
|
|
|
|
def zil_process_kstat():
|
|
global curr, pool_name, dataset_name, dsFlag, ds_pairs
|
|
curr.clear()
|
|
if gFlag == True:
|
|
kstat_update()
|
|
zil_build_dict()
|
|
else:
|
|
if pool_name:
|
|
kstat_update(pool_name)
|
|
zil_build_dict(pool_name)
|
|
elif dataset_name:
|
|
if dsFlag == False:
|
|
dsFlag = True
|
|
datasets = dataset_name.split(',')
|
|
ds_pairs = defaultdict(list)
|
|
for ds in datasets:
|
|
try:
|
|
objid = subprocess.check_output(['zfs',
|
|
'list', '-Hpo', 'objsetid', ds], \
|
|
stderr=subprocess.DEVNULL) \
|
|
.decode('utf-8').strip()
|
|
except subprocess.CalledProcessError as e:
|
|
print("Command: \"zfs list -Hpo objset "\
|
|
+ str(ds) + "\" failed with error code:"\
|
|
+ str(e.returncode))
|
|
print("Please make sure that dataset \""\
|
|
+ str(ds) + "\" exists")
|
|
sys.exit(1)
|
|
if not objid:
|
|
continue
|
|
ds_pairs[ds.split('/')[0]]. \
|
|
append(hex(int(objid)))
|
|
for pool, objids in ds_pairs.items():
|
|
for objid in objids:
|
|
kstat_update(pool, objid)
|
|
zil_build_dict(pool)
|
|
else:
|
|
try:
|
|
pools = subprocess.check_output(['zpool', 'list', '-Hpo',\
|
|
'name']).decode('utf-8').split()
|
|
except subprocess.CalledProcessError as e:
|
|
print("Command: \"zpool list -Hpo name\" failed with error"\
|
|
"code: " + str(e.returncode))
|
|
sys.exit(1)
|
|
for pool in pools:
|
|
kstat_update(pool)
|
|
zil_build_dict(pool)
|
|
|
|
def calculate_diff():
|
|
global curr, diff
|
|
prev = copy.deepcopy(curr)
|
|
zil_process_kstat()
|
|
diff = copy.deepcopy(curr)
|
|
for pool in curr:
|
|
for objset in curr[pool]:
|
|
for key in curr[pool][objset]:
|
|
if not isinstance(diff[pool][objset][key], int):
|
|
continue
|
|
# If prev is NULL, this is the
|
|
# first time we are here
|
|
if not prev:
|
|
diff[pool][objset][key] = 0
|
|
else:
|
|
diff[pool][objset][key] \
|
|
= curr[pool][objset][key] \
|
|
- prev[pool][objset][key]
|
|
|
|
def zil_build_dict(pool = "GLOBAL"):
|
|
global kstat
|
|
for objset in kstat:
|
|
for key in kstat[objset]:
|
|
val = kstat[objset][key]
|
|
if pool not in curr:
|
|
curr[pool] = dict()
|
|
if objset not in curr[pool]:
|
|
curr[pool][objset] = dict()
|
|
curr[pool][objset][key] = val
|
|
|
|
def zil_extend_dict():
|
|
global diff
|
|
for pool in diff:
|
|
for objset in diff[pool]:
|
|
diff[pool][objset]["pool"] = pool
|
|
diff[pool][objset]["objset"] = objset
|
|
diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
|
|
time.localtime())
|
|
diff[pool][objset]["icc+inc"] = \
|
|
diff[pool][objset]["zil_itx_copied_count"] + \
|
|
diff[pool][objset]["zil_itx_needcopy_count"]
|
|
diff[pool][objset]["icb+inb"] = \
|
|
diff[pool][objset]["zil_itx_copied_bytes"] + \
|
|
diff[pool][objset]["zil_itx_needcopy_bytes"]
|
|
diff[pool][objset]["iic+idc"] = \
|
|
diff[pool][objset]["zil_itx_indirect_count"] + \
|
|
diff[pool][objset]["zil_itx_copied_count"] + \
|
|
diff[pool][objset]["zil_itx_needcopy_count"]
|
|
diff[pool][objset]["iib+idb"] = \
|
|
diff[pool][objset]["zil_itx_indirect_bytes"] + \
|
|
diff[pool][objset]["zil_itx_copied_bytes"] + \
|
|
diff[pool][objset]["zil_itx_needcopy_bytes"]
|
|
diff[pool][objset]["imnc+imsc"] = \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_count"]
|
|
diff[pool][objset]["imnb+imsb"] = \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
|
|
diff[pool][objset]["imnw+imsw"] = \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_write"]
|
|
diff[pool][objset]["imna+imsa"] = \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
|
|
if diff[pool][objset]["imna+imsa"] > 0:
|
|
diff[pool][objset]["imb/ima"] = 100 * \
|
|
diff[pool][objset]["imnb+imsb"] // \
|
|
diff[pool][objset]["imna+imsa"]
|
|
else:
|
|
diff[pool][objset]["imb/ima"] = 100
|
|
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
|
|
diff[pool][objset]["imnb/imna"] = 100 * \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
|
|
else:
|
|
diff[pool][objset]["imnb/imna"] = 100
|
|
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
|
|
diff[pool][objset]["imsb/imsa"] = 100 * \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
|
|
else:
|
|
diff[pool][objset]["imsb/imsa"] = 100
|
|
if diff[pool][objset]["imnw+imsw"] > 0:
|
|
diff[pool][objset]["imb/imw"] = 100 * \
|
|
diff[pool][objset]["imnb+imsb"] // \
|
|
diff[pool][objset]["imnw+imsw"]
|
|
else:
|
|
diff[pool][objset]["imb/imw"] = 100
|
|
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
|
|
diff[pool][objset]["imnb/imnw"] = 100 * \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
|
|
diff[pool][objset]["zil_itx_metaslab_normal_write"]
|
|
else:
|
|
diff[pool][objset]["imnb/imnw"] = 100
|
|
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
|
|
diff[pool][objset]["imsb/imsw"] = 100 * \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
|
|
diff[pool][objset]["zil_itx_metaslab_slog_write"]
|
|
else:
|
|
diff[pool][objset]["imsb/imsw"] = 100
|
|
|
|
def sign_handler_epipe(sig, frame):
|
|
print("Caught EPIPE signal: " + str(frame))
|
|
print("Exitting...")
|
|
sys.exit(0)
|
|
|
|
def main():
|
|
global interval
|
|
global curr, diff
|
|
hprint = False
|
|
init()
|
|
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
|
signal.signal(signal.SIGPIPE, sign_handler_epipe)
|
|
|
|
zil_process_kstat()
|
|
if not curr:
|
|
print ("Error: No stats to show")
|
|
sys.exit(0)
|
|
print_header()
|
|
if interval > 0:
|
|
time.sleep(interval)
|
|
while True:
|
|
calculate_diff()
|
|
if not diff:
|
|
print ("Error: No stats to show")
|
|
sys.exit(0)
|
|
zil_extend_dict()
|
|
print_dict(diff)
|
|
time.sleep(interval)
|
|
else:
|
|
diff = curr
|
|
zil_extend_dict()
|
|
print_dict(diff)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|