diff --git a/configure.ac b/configure.ac index 18d91b359..301258e7f 100644 --- a/configure.ac +++ b/configure.ac @@ -283,7 +283,6 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/inheritance/Makefile tests/zfs-tests/tests/functional/inuse/Makefile tests/zfs-tests/tests/functional/io/Makefile - tests/zfs-tests/tests/functional/kstat/Makefile tests/zfs-tests/tests/functional/large_files/Makefile tests/zfs-tests/tests/functional/largest_pool/Makefile tests/zfs-tests/tests/functional/link_count/Makefile @@ -301,6 +300,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/pool_checkpoint/Makefile tests/zfs-tests/tests/functional/poolversion/Makefile tests/zfs-tests/tests/functional/privilege/Makefile + tests/zfs-tests/tests/functional/procfs/Makefile tests/zfs-tests/tests/functional/projectquota/Makefile tests/zfs-tests/tests/functional/pyzfs/Makefile tests/zfs-tests/tests/functional/quota/Makefile diff --git a/include/spl/sys/Makefile.am b/include/spl/sys/Makefile.am index d58ed0e20..e596ff373 100644 --- a/include/spl/sys/Makefile.am +++ b/include/spl/sys/Makefile.am @@ -28,6 +28,7 @@ KERNEL_H = \ $(top_srcdir)/include/spl/sys/param.h \ $(top_srcdir)/include/spl/sys/processor.h \ $(top_srcdir)/include/spl/sys/proc.h \ + $(top_srcdir)/include/spl/sys/procfs_list.h \ $(top_srcdir)/include/spl/sys/random.h \ $(top_srcdir)/include/spl/sys/rwlock.h \ $(top_srcdir)/include/spl/sys/shrinker.h \ diff --git a/include/spl/sys/kstat.h b/include/spl/sys/kstat.h index f197ce455..53274d8f5 100644 --- a/include/spl/sys/kstat.h +++ b/include/spl/sys/kstat.h @@ -98,30 +98,34 @@ typedef struct kstat_raw_ops { void *(*addr)(kstat_t *ksp, loff_t index); } kstat_raw_ops_t; +typedef struct kstat_proc_entry { + char kpe_name[KSTAT_STRLEN+1]; /* kstat name */ + char kpe_module[KSTAT_STRLEN+1]; /* provider module name */ + kstat_module_t *kpe_owner; /* kstat module linkage */ + struct list_head kpe_list; /* kstat linkage */ + struct proc_dir_entry *kpe_proc; /* procfs entry */ +} kstat_proc_entry_t; + struct kstat_s { int ks_magic; /* magic value */ kid_t ks_kid; /* unique kstat ID */ hrtime_t ks_crtime; /* creation time */ hrtime_t ks_snaptime; /* last access time */ - char ks_module[KSTAT_STRLEN+1]; /* provider module name */ int ks_instance; /* provider module instance */ - char ks_name[KSTAT_STRLEN+1]; /* kstat name */ char ks_class[KSTAT_STRLEN+1]; /* kstat class */ uchar_t ks_type; /* kstat data type */ uchar_t ks_flags; /* kstat flags */ void *ks_data; /* kstat type-specific data */ uint_t ks_ndata; /* # of data records */ size_t ks_data_size; /* size of kstat data section */ - struct proc_dir_entry *ks_proc; /* proc linkage */ kstat_update_t *ks_update; /* dynamic updates */ void *ks_private; /* private data */ kmutex_t ks_private_lock; /* kstat private data lock */ kmutex_t *ks_lock; /* kstat data lock */ - struct list_head ks_list; /* kstat linkage */ - kstat_module_t *ks_owner; /* kstat module linkage */ kstat_raw_ops_t ks_raw_ops; /* ops table for raw type */ char *ks_raw_buf; /* buf used for raw ops */ size_t ks_raw_bufsize; /* size of raw ops buffer */ + kstat_proc_entry_t ks_proc; /* data for procfs entry */ }; typedef struct kstat_named_s { @@ -189,6 +193,12 @@ extern kstat_t *__kstat_create(const char *ks_module, int ks_instance, const char *ks_name, const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags); +extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep, + const char *module, const char *name); +extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep); +extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, + const struct file_operations *file_ops, void *data); + extern void __kstat_install(kstat_t *ksp); extern void __kstat_delete(kstat_t *ksp); extern void kstat_waitq_enter(kstat_io_t *); diff --git a/include/spl/sys/procfs_list.h b/include/spl/sys/procfs_list.h new file mode 100644 index 000000000..cbcb4bcff --- /dev/null +++ b/include/spl/sys/procfs_list.h @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2018 by Delphix. All rights reserved. + */ + +#ifndef _SPL_PROCFS_LIST_H +#define _SPL_PROCFS_LIST_H + +#include +#include +#include +#include + +typedef struct procfs_list procfs_list_t; +struct procfs_list { + /* Accessed only by user of a procfs_list */ + void *pl_private; + + /* + * Accessed both by user of a procfs_list and by procfs_list + * implementation + */ + kmutex_t pl_lock; + list_t pl_list; + + /* Accessed only by procfs_list implementation */ + uint64_t pl_next_id; + int (*pl_show)(struct seq_file *f, void *p); + int (*pl_show_header)(struct seq_file *f); + int (*pl_clear)(procfs_list_t *procfs_list); + size_t pl_node_offset; + kstat_proc_entry_t pl_kstat_entry; +}; + +typedef struct procfs_list_node { + list_node_t pln_link; + uint64_t pln_id; +} procfs_list_node_t; + +void procfs_list_install(const char *module, + const char *name, + procfs_list_t *procfs_list, + int (*show)(struct seq_file *f, void *p), + int (*show_header)(struct seq_file *f), + int (*clear)(procfs_list_t *procfs_list), + size_t procfs_list_node_off); +void procfs_list_uninstall(procfs_list_t *procfs_list); +void procfs_list_destroy(procfs_list_t *procfs_list); + +void procfs_list_add(procfs_list_t *procfs_list, void *p); + +#endif /* _SPL_PROCFS_LIST_H */ diff --git a/include/sys/spa.h b/include/sys/spa.h index b86c65557..443d835a1 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -863,22 +863,27 @@ extern boolean_t spa_refcount_zero(spa_t *spa); #define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO) /* Historical pool statistics */ -typedef struct spa_stats_history { +typedef struct spa_history_kstat { kmutex_t lock; uint64_t count; uint64_t size; kstat_t *kstat; void *private; list_t list; -} spa_stats_history_t; +} spa_history_kstat_t; + +typedef struct spa_history_list { + uint64_t size; + procfs_list_t procfs_list; +} spa_history_list_t; typedef struct spa_stats { - spa_stats_history_t read_history; - spa_stats_history_t txg_history; - spa_stats_history_t tx_assign_histogram; - spa_stats_history_t io_history; - spa_stats_history_t mmp_history; - spa_stats_history_t state; /* pool state */ + spa_history_list_t read_history; + spa_history_list_t txg_history; + spa_history_kstat_t tx_assign_histogram; + spa_history_kstat_t io_history; + spa_history_list_t mmp_history; + spa_history_kstat_t state; /* pool state */ } spa_stats_t; typedef enum txg_state { @@ -911,7 +916,7 @@ extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs); extern int spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id); extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, hrtime_t duration); -extern void *spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, +extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id, int error); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 6f502897e..11c048c23 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -351,6 +352,37 @@ extern void kstat_set_raw_ops(kstat_t *ksp, int (*data)(char *buf, size_t size, void *data), void *(*addr)(kstat_t *ksp, loff_t index)); +/* + * procfs list manipulation + */ + +struct seq_file { }; +void seq_printf(struct seq_file *m, const char *fmt, ...); + +typedef struct procfs_list { + void *pl_private; + kmutex_t pl_lock; + list_t pl_list; + uint64_t pl_next_id; + size_t pl_node_offset; +} procfs_list_t; + +typedef struct procfs_list_node { + list_node_t pln_link; + uint64_t pln_id; +} procfs_list_node_t; + +void procfs_list_install(const char *module, + const char *name, + procfs_list_t *procfs_list, + int (*show)(struct seq_file *f, void *p), + int (*show_header)(struct seq_file *f), + int (*clear)(procfs_list_t *procfs_list), + size_t procfs_list_node_off); +void procfs_list_uninstall(procfs_list_t *procfs_list); +void procfs_list_destroy(procfs_list_t *procfs_list); +void procfs_list_add(procfs_list_t *procfs_list, void *p); + /* * Kernel memory */ diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h index aa9bfe21f..f3a936ae7 100644 --- a/include/sys/zfs_debug.h +++ b/include/sys/zfs_debug.h @@ -76,13 +76,6 @@ extern void __dprintf(const char *file, const char *func, extern void zfs_panic_recover(const char *fmt, ...); -typedef struct zfs_dbgmsg { - list_node_t zdm_node; - time_t zdm_timestamp; - int zdm_size; - char zdm_msg[1]; /* variable length allocation */ -} zfs_dbgmsg_t; - extern void zfs_dbgmsg_init(void); extern void zfs_dbgmsg_fini(void); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 341548ac3..5baf52514 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -424,6 +424,57 @@ cv_broadcast(kcondvar_t *cv) VERIFY0(pthread_cond_broadcast(cv)); } +/* + * ========================================================================= + * procfs list + * ========================================================================= + */ + +void +seq_printf(struct seq_file *m, const char *fmt, ...) +{} + +void +procfs_list_install(const char *module, + const char *name, + procfs_list_t *procfs_list, + int (*show)(struct seq_file *f, void *p), + int (*show_header)(struct seq_file *f), + int (*clear)(procfs_list_t *procfs_list), + size_t procfs_list_node_off) +{ + mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&procfs_list->pl_list, + procfs_list_node_off + sizeof (procfs_list_node_t), + procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); + procfs_list->pl_next_id = 1; + procfs_list->pl_node_offset = procfs_list_node_off; +} + +void +procfs_list_uninstall(procfs_list_t *procfs_list) +{} + +void +procfs_list_destroy(procfs_list_t *procfs_list) +{ + ASSERT(list_is_empty(&procfs_list->pl_list)); + list_destroy(&procfs_list->pl_list); + mutex_destroy(&procfs_list->pl_lock); +} + +#define NODE_ID(procfs_list, obj) \ + (((procfs_list_node_t *)(((char *)obj) + \ + (procfs_list)->pl_node_offset))->pln_id) + +void +procfs_list_add(procfs_list_t *procfs_list, void *p) +{ + ASSERT(MUTEX_HELD(&procfs_list->pl_lock)); + NODE_ID(procfs_list, p) = procfs_list->pl_next_id++; + list_insert_tail(&procfs_list->pl_list, p); +} + /* * ========================================================================= * vnode operations diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in index 97a431f22..3bcbf63cb 100644 --- a/module/spl/Makefile.in +++ b/module/spl/Makefile.in @@ -18,6 +18,7 @@ $(MODULE)-objs += spl-kobj.o $(MODULE)-objs += spl-kstat.o $(MODULE)-objs += spl-mutex.o $(MODULE)-objs += spl-proc.o +$(MODULE)-objs += spl-procfs-list.o $(MODULE)-objs += spl-rwlock.o $(MODULE)-objs += spl-taskq.o $(MODULE)-objs += spl-thread.o diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c index c3fc2e4b2..8683693c8 100644 --- a/module/spl/spl-kstat.c +++ b/module/spl/spl-kstat.c @@ -530,6 +530,18 @@ __kstat_set_raw_ops(kstat_t *ksp, } EXPORT_SYMBOL(__kstat_set_raw_ops); +void +kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module, + const char *name) +{ + kpep->kpe_owner = NULL; + kpep->kpe_proc = NULL; + INIT_LIST_HEAD(&kpep->kpe_list); + strncpy(kpep->kpe_module, module, KSTAT_STRLEN); + strncpy(kpep->kpe_name, name, KSTAT_STRLEN); +} +EXPORT_SYMBOL(kstat_proc_entry_init); + kstat_t * __kstat_create(const char *ks_module, int ks_instance, const char *ks_name, const char *ks_class, uchar_t ks_type, uint_t ks_ndata, @@ -556,13 +568,10 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name, ksp->ks_magic = KS_MAGIC; mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL); ksp->ks_lock = &ksp->ks_private_lock; - INIT_LIST_HEAD(&ksp->ks_list); ksp->ks_crtime = gethrtime(); ksp->ks_snaptime = ksp->ks_crtime; - strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN); ksp->ks_instance = ks_instance; - strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN); strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN); ksp->ks_type = ks_type; ksp->ks_flags = ks_flags; @@ -573,6 +582,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name, ksp->ks_raw_ops.addr = NULL; ksp->ks_raw_buf = NULL; ksp->ks_raw_bufsize = 0; + kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name); switch (ksp->ks_type) { case KSTAT_TYPE_RAW: @@ -614,14 +624,14 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name, EXPORT_SYMBOL(__kstat_create); static int -kstat_detect_collision(kstat_t *ksp) +kstat_detect_collision(kstat_proc_entry_t *kpep) { kstat_module_t *module; - kstat_t *tmp; + kstat_proc_entry_t *tmp; char *parent; char *cp; - parent = kmem_asprintf("%s", ksp->ks_module); + parent = kmem_asprintf("%s", kpep->kpe_module); if ((cp = strrchr(parent, '/')) == NULL) { strfree(parent); @@ -630,8 +640,8 @@ kstat_detect_collision(kstat_t *ksp) cp[0] = '\0'; if ((module = kstat_find_module(parent)) != NULL) { - list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) { - if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0) { + list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) { + if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) { strfree(parent); return (EEXIST); } @@ -642,24 +652,30 @@ kstat_detect_collision(kstat_t *ksp) return (0); } +/* + * Add a file to the proc filesystem under the kstat namespace (i.e. + * /proc/spl/kstat/). The file need not necessarily be implemented as a + * kstat. + */ void -__kstat_install(kstat_t *ksp) +kstat_proc_entry_install(kstat_proc_entry_t *kpep, + const struct file_operations *file_ops, void *data) { kstat_module_t *module; - kstat_t *tmp; + kstat_proc_entry_t *tmp; - ASSERT(ksp); + ASSERT(kpep); mutex_enter(&kstat_module_lock); - module = kstat_find_module(ksp->ks_module); + module = kstat_find_module(kpep->kpe_module); if (module == NULL) { - if (kstat_detect_collision(ksp) != 0) { + if (kstat_detect_collision(kpep) != 0) { cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \ - " collision", ksp->ks_module, ksp->ks_name); + " collision", kpep->kpe_module, kpep->kpe_name); goto out; } - module = kstat_create_module(ksp->ks_module); + module = kstat_create_module(kpep->kpe_module); if (module == NULL) goto out; } @@ -668,44 +684,60 @@ __kstat_install(kstat_t *ksp) * Only one entry by this name per-module, on failure the module * shouldn't be deleted because we know it has at least one entry. */ - list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) { - if (strncmp(tmp->ks_name, ksp->ks_name, KSTAT_STRLEN) == 0) + list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) { + if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0) goto out; } - list_add_tail(&ksp->ks_list, &module->ksm_kstat_list); + list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list); - mutex_enter(ksp->ks_lock); - ksp->ks_owner = module; - ksp->ks_proc = proc_create_data(ksp->ks_name, 0644, - module->ksm_proc, &proc_kstat_operations, (void *)ksp); - if (ksp->ks_proc == NULL) { - list_del_init(&ksp->ks_list); + kpep->kpe_owner = module; + kpep->kpe_proc = proc_create_data(kpep->kpe_name, 0644, + module->ksm_proc, file_ops, data); + if (kpep->kpe_proc == NULL) { + list_del_init(&kpep->kpe_list); if (list_empty(&module->ksm_kstat_list)) kstat_delete_module(module); } - mutex_exit(ksp->ks_lock); out: mutex_exit(&kstat_module_lock); + +} +EXPORT_SYMBOL(kstat_proc_entry_install); + +void +__kstat_install(kstat_t *ksp) +{ + ASSERT(ksp); + kstat_proc_entry_install(&ksp->ks_proc, &proc_kstat_operations, ksp); } EXPORT_SYMBOL(__kstat_install); void -__kstat_delete(kstat_t *ksp) +kstat_proc_entry_delete(kstat_proc_entry_t *kpep) { - kstat_module_t *module = ksp->ks_owner; + kstat_module_t *module = kpep->kpe_owner; + if (kpep->kpe_proc) + remove_proc_entry(kpep->kpe_name, module->ksm_proc); mutex_enter(&kstat_module_lock); - list_del_init(&ksp->ks_list); + list_del_init(&kpep->kpe_list); + + /* + * Remove top level module directory if it wasn't empty before, but now + * is. + */ + if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list)) + kstat_delete_module(module); mutex_exit(&kstat_module_lock); - if (ksp->ks_proc) { - remove_proc_entry(ksp->ks_name, module->ksm_proc); +} +EXPORT_SYMBOL(kstat_proc_entry_delete); - /* Remove top level module directory if it's empty */ - if (list_empty(&module->ksm_kstat_list)) - kstat_delete_module(module); - } +void +__kstat_delete(kstat_t *ksp) +{ + kstat_proc_entry_delete(&ksp->ks_proc); if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) kmem_free(ksp->ks_data, ksp->ks_data_size); diff --git a/module/spl/spl-procfs-list.c b/module/spl/spl-procfs-list.c new file mode 100644 index 000000000..4902e0a56 --- /dev/null +++ b/module/spl/spl-procfs-list.c @@ -0,0 +1,256 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2018 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include + +/* + * A procfs_list is a wrapper around a linked list which implements the seq_file + * interface, allowing the contents of the list to be exposed through procfs. + * The kernel already has some utilities to help implement the seq_file + * interface for linked lists (seq_list_*), but they aren't appropriate for use + * with lists that have many entries, because seq_list_start walks the list at + * the start of each read syscall to find where it left off, so reading a file + * ends up being quadratic in the number of entries in the list. + * + * This implementation avoids this penalty by maintaining a separate cursor into + * the list per instance of the file that is open. It also maintains some extra + * information in each node of the list to prevent reads of entries that have + * been dropped from the list. + * + * Callers should only add elements to the list using procfs_list_add, which + * adds an element to the tail of the list. Other operations can be performed + * directly on the wrapped list using the normal list manipulation functions, + * but elements should only be removed from the head of the list. + */ + +#define NODE_ID(procfs_list, obj) \ + (((procfs_list_node_t *)(((char *)obj) + \ + (procfs_list)->pl_node_offset))->pln_id) + +typedef struct procfs_list_cursor { + procfs_list_t *procfs_list; /* List into which this cursor points */ + void *cached_node; /* Most recently accessed node */ + loff_t cached_pos; /* Position of cached_node */ +} procfs_list_cursor_t; + +static int +procfs_list_seq_show(struct seq_file *f, void *p) +{ + procfs_list_cursor_t *cursor = f->private; + procfs_list_t *procfs_list = cursor->procfs_list; + + ASSERT(MUTEX_HELD(&procfs_list->pl_lock)); + if (p == SEQ_START_TOKEN) { + if (procfs_list->pl_show_header != NULL) + return (procfs_list->pl_show_header(f)); + else + return (0); + } + return (procfs_list->pl_show(f, p)); +} + +static void * +procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos) +{ + void *next_node; + procfs_list_t *procfs_list = cursor->procfs_list; + + if (cursor->cached_node == SEQ_START_TOKEN) + next_node = list_head(&procfs_list->pl_list); + else + next_node = list_next(&procfs_list->pl_list, + cursor->cached_node); + + if (next_node != NULL) { + cursor->cached_node = next_node; + cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node); + *pos = cursor->cached_pos; + } + return (next_node); +} + +static void * +procfs_list_seq_start(struct seq_file *f, loff_t *pos) +{ + procfs_list_cursor_t *cursor = f->private; + procfs_list_t *procfs_list = cursor->procfs_list; + + mutex_enter(&procfs_list->pl_lock); + + if (*pos == 0) { + cursor->cached_node = SEQ_START_TOKEN; + cursor->cached_pos = 0; + return (SEQ_START_TOKEN); + } + + /* + * Check if our cached pointer has become stale, which happens if the + * the message where we left off has been dropped from the list since + * the last read syscall completed. + */ + void *oldest_node = list_head(&procfs_list->pl_list); + if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL || + NODE_ID(procfs_list, oldest_node) > cursor->cached_pos)) + return (ERR_PTR(-EIO)); + + /* + * If it isn't starting from the beginning of the file, the seq_file + * code will either pick up at the same position it visited last or the + * following one. + */ + if (*pos == cursor->cached_pos) { + return (cursor->cached_node); + } else { + ASSERT3U(*pos, ==, cursor->cached_pos + 1); + return (procfs_list_next_node(cursor, pos)); + } +} + +static void * +procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + procfs_list_cursor_t *cursor = f->private; + ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock)); + return (procfs_list_next_node(cursor, pos)); +} + +static void +procfs_list_seq_stop(struct seq_file *f, void *p) +{ + procfs_list_cursor_t *cursor = f->private; + procfs_list_t *procfs_list = cursor->procfs_list; + mutex_exit(&procfs_list->pl_lock); +} + +static struct seq_operations procfs_list_seq_ops = { + .show = procfs_list_seq_show, + .start = procfs_list_seq_start, + .next = procfs_list_seq_next, + .stop = procfs_list_seq_stop, +}; + +static int +procfs_list_open(struct inode *inode, struct file *filp) +{ + int rc = seq_open_private(filp, &procfs_list_seq_ops, + sizeof (procfs_list_cursor_t)); + if (rc != 0) + return (rc); + + struct seq_file *f = filp->private_data; + procfs_list_cursor_t *cursor = f->private; + cursor->procfs_list = PDE_DATA(inode); + cursor->cached_node = NULL; + cursor->cached_pos = 0; + + return (0); +} + +static ssize_t +procfs_list_write(struct file *filp, const char __user *buf, size_t len, + loff_t *ppos) +{ + struct seq_file *f = filp->private_data; + procfs_list_cursor_t *cursor = f->private; + procfs_list_t *procfs_list = cursor->procfs_list; + int rc; + + if (procfs_list->pl_clear != NULL && + (rc = procfs_list->pl_clear(procfs_list)) != 0) + return (-rc); + return (len); +} + +static struct file_operations procfs_list_operations = { + .owner = THIS_MODULE, + .open = procfs_list_open, + .write = procfs_list_write, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* + * Initialize a procfs_list and create a file for it in the proc filesystem + * under the kstat namespace. + */ +void +procfs_list_install(const char *module, + const char *name, + procfs_list_t *procfs_list, + int (*show)(struct seq_file *f, void *p), + int (*show_header)(struct seq_file *f), + int (*clear)(procfs_list_t *procfs_list), + size_t procfs_list_node_off) +{ + mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&procfs_list->pl_list, + procfs_list_node_off + sizeof (procfs_list_node_t), + procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); + procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */ + procfs_list->pl_show = show; + procfs_list->pl_show_header = show_header; + procfs_list->pl_clear = clear; + procfs_list->pl_node_offset = procfs_list_node_off; + + kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name); + kstat_proc_entry_install(&procfs_list->pl_kstat_entry, + &procfs_list_operations, procfs_list); +} +EXPORT_SYMBOL(procfs_list_install); + +/* Remove the proc filesystem file corresponding to the given list */ +void +procfs_list_uninstall(procfs_list_t *procfs_list) +{ + kstat_proc_entry_delete(&procfs_list->pl_kstat_entry); +} +EXPORT_SYMBOL(procfs_list_uninstall); + +void +procfs_list_destroy(procfs_list_t *procfs_list) +{ + ASSERT(list_is_empty(&procfs_list->pl_list)); + list_destroy(&procfs_list->pl_list); + mutex_destroy(&procfs_list->pl_lock); +} +EXPORT_SYMBOL(procfs_list_destroy); + +/* + * Add a new node to the tail of the list. While the standard list manipulation + * functions can be use for all other operation, adding elements to the list + * should only be done using this helper so that the id of the new node is set + * correctly. + */ +void +procfs_list_add(procfs_list_t *procfs_list, void *p) +{ + ASSERT(MUTEX_HELD(&procfs_list->pl_lock)); + NODE_ID(procfs_list, p) = procfs_list->pl_next_id++; + list_insert_tail(&procfs_list->pl_list, p); +} +EXPORT_SYMBOL(procfs_list_add); diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index fa1cf9e98..c02ef86b5 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -55,7 +55,6 @@ int zfs_multihost_history = 0; * Read statistics - Information exported regarding each arc_read call */ typedef struct spa_read_history { - uint64_t uid; /* unique identifier */ hrtime_t start; /* time read completed */ uint64_t objset; /* read from this objset */ uint64_t object; /* read of this object number */ @@ -65,13 +64,13 @@ typedef struct spa_read_history { uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */ pid_t pid; /* PID of task doing read */ char comm[16]; /* process name of task doing read */ - list_node_t srh_link; + procfs_list_node_t srh_node; } spa_read_history_t; static int -spa_read_history_headers(char *buf, size_t size) +spa_read_history_show_header(struct seq_file *f) { - (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s " + seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s " "%-24s %-8s %-16s\n", "UID", "start", "objset", "object", "level", "blkid", "aflags", "origin", "pid", "process"); @@ -79,13 +78,13 @@ spa_read_history_headers(char *buf, size_t size) } static int -spa_read_history_data(char *buf, size_t size, void *data) +spa_read_history_show(struct seq_file *f, void *data) { spa_read_history_t *srh = (spa_read_history_t *)data; - (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx " + seq_printf(f, "%-8llu %-16llu 0x%-6llx " "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n", - (u_longlong_t)srh->uid, srh->start, + (u_longlong_t)srh->srh_node.pln_id, srh->start, (longlong_t)srh->objset, (longlong_t)srh->object, (longlong_t)srh->level, (longlong_t)srh->blkid, srh->aflags, srh->origin, srh->pid, srh->comm); @@ -93,120 +92,73 @@ spa_read_history_data(char *buf, size_t size, void *data) return (0); } -/* - * Calculate the address for the next spa_stats_history_t entry. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static void * -spa_read_history_addr(kstat_t *ksp, loff_t n) +/* Remove oldest elements from list until there are no more than 'size' left */ +static void +spa_read_history_truncate(spa_history_list_t *shl, unsigned int size) { - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.read_history; - - ASSERT(MUTEX_HELD(&ssh->lock)); - - if (n == 0) - ssh->private = list_tail(&ssh->list); - else if (ssh->private) - ssh->private = list_prev(&ssh->list, ssh->private); - - return (ssh->private); -} - -/* - * When the kstat is written discard all spa_read_history_t entries. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static int -spa_read_history_update(kstat_t *ksp, int rw) -{ - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.read_history; - - if (rw == KSTAT_WRITE) { - spa_read_history_t *srh; - - while ((srh = list_remove_head(&ssh->list))) { - ssh->size--; - kmem_free(srh, sizeof (spa_read_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); + spa_read_history_t *srh; + while (shl->size > size) { + srh = list_remove_head(&shl->procfs_list.pl_list); + ASSERT3P(srh, !=, NULL); + kmem_free(srh, sizeof (spa_read_history_t)); + shl->size--; } - ksp->ks_ndata = ssh->size; - ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t); + if (size == 0) + ASSERT(list_is_empty(&shl->procfs_list.pl_list)); +} +static int +spa_read_history_clear(procfs_list_t *procfs_list) +{ + spa_history_list_t *shl = procfs_list->pl_private; + mutex_enter(&procfs_list->pl_lock); + spa_read_history_truncate(shl, 0); + mutex_exit(&procfs_list->pl_lock); return (0); } static void spa_read_history_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.read_history; - char *name; - kstat_t *ksp; + spa_history_list_t *shl = &spa->spa_stats.read_history; + char *module; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&ssh->list, sizeof (spa_read_history_t), - offsetof(spa_read_history_t, srh_link)); + shl->size = 0; - ssh->count = 0; - ssh->size = 0; - ssh->private = NULL; + module = kmem_asprintf("zfs/%s", spa_name(spa)); - name = kmem_asprintf("zfs/%s", spa_name(spa)); + shl->procfs_list.pl_private = shl; + procfs_list_install(module, + "reads", + &shl->procfs_list, + spa_read_history_show, + spa_read_history_show_header, + spa_read_history_clear, + offsetof(spa_read_history_t, srh_node)); - ksp = kstat_create(name, 0, "reads", "misc", - KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - ssh->kstat = ksp; - - if (ksp) { - ksp->ks_lock = &ssh->lock; - ksp->ks_data = NULL; - ksp->ks_private = spa; - ksp->ks_update = spa_read_history_update; - kstat_set_raw_ops(ksp, spa_read_history_headers, - spa_read_history_data, spa_read_history_addr); - kstat_install(ksp); - } - strfree(name); + strfree(module); } static void spa_read_history_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.read_history; - spa_read_history_t *srh; - kstat_t *ksp; - - ksp = ssh->kstat; - if (ksp) - kstat_delete(ksp); - - mutex_enter(&ssh->lock); - while ((srh = list_remove_head(&ssh->list))) { - ssh->size--; - kmem_free(srh, sizeof (spa_read_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); - list_destroy(&ssh->list); - mutex_exit(&ssh->lock); - - mutex_destroy(&ssh->lock); + spa_history_list_t *shl = &spa->spa_stats.read_history; + procfs_list_uninstall(&shl->procfs_list); + spa_read_history_truncate(shl, 0); + procfs_list_destroy(&shl->procfs_list); } void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags) { - spa_stats_history_t *ssh = &spa->spa_stats.read_history; - spa_read_history_t *srh, *rm; + spa_history_list_t *shl = &spa->spa_stats.read_history; + spa_read_history_t *srh; ASSERT3P(spa, !=, NULL); ASSERT3P(zb, !=, NULL); - if (zfs_read_history == 0 && ssh->size == 0) + if (zfs_read_history == 0 && shl->size == 0) return; if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED)) @@ -222,19 +174,14 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags) srh->aflags = aflags; srh->pid = getpid(); - mutex_enter(&ssh->lock); + mutex_enter(&shl->procfs_list.pl_lock); - srh->uid = ssh->count++; - list_insert_head(&ssh->list, srh); - ssh->size++; + procfs_list_add(&shl->procfs_list, srh); + shl->size++; - while (ssh->size > zfs_read_history) { - ssh->size--; - rm = list_remove_tail(&ssh->list); - kmem_free(rm, sizeof (spa_read_history_t)); - } + spa_read_history_truncate(shl, zfs_read_history); - mutex_exit(&ssh->lock); + mutex_exit(&shl->procfs_list.pl_lock); } /* @@ -256,22 +203,21 @@ typedef struct spa_txg_history { uint64_t writes; /* number of write operations */ uint64_t ndirty; /* number of dirty bytes */ hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */ - list_node_t sth_link; + procfs_list_node_t sth_node; } spa_txg_history_t; static int -spa_txg_history_headers(char *buf, size_t size) +spa_txg_history_show_header(struct seq_file *f) { - (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s " + seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s " "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state", "ndirty", "nread", "nwritten", "reads", "writes", "otime", "qtime", "wtime", "stime"); - return (0); } static int -spa_txg_history_data(char *buf, size_t size, void *data) +spa_txg_history_show(struct seq_file *f, void *data) { spa_txg_history_t *sth = (spa_txg_history_t *)data; uint64_t open = 0, quiesce = 0, wait = 0, sync = 0; @@ -303,7 +249,7 @@ spa_txg_history_data(char *buf, size_t size, void *data) sync = sth->times[TXG_STATE_SYNCED] - sth->times[TXG_STATE_WAIT_FOR_SYNC]; - (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu " + seq_printf(f, "%-8llu %-16llu %-5c %-12llu " "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n", (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state, (u_longlong_t)sth->ndirty, @@ -315,110 +261,62 @@ spa_txg_history_data(char *buf, size_t size, void *data) return (0); } -/* - * Calculate the address for the next spa_stats_history_t entry. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static void * -spa_txg_history_addr(kstat_t *ksp, loff_t n) +/* Remove oldest elements from list until there are no more than 'size' left */ +static void +spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size) { - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; - - ASSERT(MUTEX_HELD(&ssh->lock)); - - if (n == 0) - ssh->private = list_tail(&ssh->list); - else if (ssh->private) - ssh->private = list_prev(&ssh->list, ssh->private); - - return (ssh->private); -} - -/* - * When the kstat is written discard all spa_txg_history_t entries. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static int -spa_txg_history_update(kstat_t *ksp, int rw) -{ - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; - - ASSERT(MUTEX_HELD(&ssh->lock)); - - if (rw == KSTAT_WRITE) { - spa_txg_history_t *sth; - - while ((sth = list_remove_head(&ssh->list))) { - ssh->size--; - kmem_free(sth, sizeof (spa_txg_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); + spa_txg_history_t *sth; + while (shl->size > size) { + sth = list_remove_head(&shl->procfs_list.pl_list); + ASSERT3P(sth, !=, NULL); + kmem_free(sth, sizeof (spa_txg_history_t)); + shl->size--; } - ksp->ks_ndata = ssh->size; - ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t); + if (size == 0) + ASSERT(list_is_empty(&shl->procfs_list.pl_list)); +} + +static int +spa_txg_history_clear(procfs_list_t *procfs_list) +{ + spa_history_list_t *shl = procfs_list->pl_private; + mutex_enter(&procfs_list->pl_lock); + spa_txg_history_truncate(shl, 0); + mutex_exit(&procfs_list->pl_lock); return (0); } static void spa_txg_history_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; - char *name; - kstat_t *ksp; + spa_history_list_t *shl = &spa->spa_stats.txg_history; + char *module; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&ssh->list, sizeof (spa_txg_history_t), - offsetof(spa_txg_history_t, sth_link)); + shl->size = 0; - ssh->count = 0; - ssh->size = 0; - ssh->private = NULL; + module = kmem_asprintf("zfs/%s", spa_name(spa)); - name = kmem_asprintf("zfs/%s", spa_name(spa)); + shl->procfs_list.pl_private = shl; + procfs_list_install(module, + "txgs", + &shl->procfs_list, + spa_txg_history_show, + spa_txg_history_show_header, + spa_txg_history_clear, + offsetof(spa_txg_history_t, sth_node)); - ksp = kstat_create(name, 0, "txgs", "misc", - KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - ssh->kstat = ksp; - - if (ksp) { - ksp->ks_lock = &ssh->lock; - ksp->ks_data = NULL; - ksp->ks_private = spa; - ksp->ks_update = spa_txg_history_update; - kstat_set_raw_ops(ksp, spa_txg_history_headers, - spa_txg_history_data, spa_txg_history_addr); - kstat_install(ksp); - } - strfree(name); + strfree(module); } static void spa_txg_history_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; - spa_txg_history_t *sth; - kstat_t *ksp; - - ksp = ssh->kstat; - if (ksp) - kstat_delete(ksp); - - mutex_enter(&ssh->lock); - while ((sth = list_remove_head(&ssh->list))) { - ssh->size--; - kmem_free(sth, sizeof (spa_txg_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); - list_destroy(&ssh->list); - mutex_exit(&ssh->lock); - - mutex_destroy(&ssh->lock); + spa_history_list_t *shl = &spa->spa_stats.txg_history; + procfs_list_uninstall(&shl->procfs_list); + spa_txg_history_truncate(shl, 0); + procfs_list_destroy(&shl->procfs_list); } /* @@ -427,10 +325,10 @@ spa_txg_history_destroy(spa_t *spa) void spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) { - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; - spa_txg_history_t *sth, *rm; + spa_history_list_t *shl = &spa->spa_stats.txg_history; + spa_txg_history_t *sth; - if (zfs_txg_history == 0 && ssh->size == 0) + if (zfs_txg_history == 0 && shl->size == 0) return; sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP); @@ -438,18 +336,11 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) sth->state = TXG_STATE_OPEN; sth->times[TXG_STATE_BIRTH] = birth_time; - mutex_enter(&ssh->lock); - - list_insert_head(&ssh->list, sth); - ssh->size++; - - while (ssh->size > zfs_txg_history) { - ssh->size--; - rm = list_remove_tail(&ssh->list); - kmem_free(rm, sizeof (spa_txg_history_t)); - } - - mutex_exit(&ssh->lock); + mutex_enter(&shl->procfs_list.pl_lock); + procfs_list_add(&shl->procfs_list, sth); + shl->size++; + spa_txg_history_truncate(shl, zfs_txg_history); + mutex_exit(&shl->procfs_list.pl_lock); } /* @@ -459,16 +350,16 @@ int spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state, hrtime_t completed_time) { - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; + spa_history_list_t *shl = &spa->spa_stats.txg_history; spa_txg_history_t *sth; int error = ENOENT; if (zfs_txg_history == 0) return (0); - mutex_enter(&ssh->lock); - for (sth = list_head(&ssh->list); sth != NULL; - sth = list_next(&ssh->list, sth)) { + mutex_enter(&shl->procfs_list.pl_lock); + for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL; + sth = list_prev(&shl->procfs_list.pl_list, sth)) { if (sth->txg == txg) { sth->times[completed_state] = completed_time; sth->state++; @@ -476,7 +367,7 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state, break; } } - mutex_exit(&ssh->lock); + mutex_exit(&shl->procfs_list.pl_lock); return (error); } @@ -488,16 +379,16 @@ static int spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread, uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty) { - spa_stats_history_t *ssh = &spa->spa_stats.txg_history; + spa_history_list_t *shl = &spa->spa_stats.txg_history; spa_txg_history_t *sth; int error = ENOENT; if (zfs_txg_history == 0) return (0); - mutex_enter(&ssh->lock); - for (sth = list_head(&ssh->list); sth != NULL; - sth = list_next(&ssh->list, sth)) { + mutex_enter(&shl->procfs_list.pl_lock); + for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL; + sth = list_prev(&shl->procfs_list.pl_list, sth)) { if (sth->txg == txg) { sth->nread = nread; sth->nwritten = nwritten; @@ -508,7 +399,7 @@ spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread, break; } } - mutex_exit(&ssh->lock); + mutex_exit(&shl->procfs_list.pl_lock); return (error); } @@ -580,16 +471,16 @@ static int spa_tx_assign_update(kstat_t *ksp, int rw) { spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; + spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; int i; if (rw == KSTAT_WRITE) { - for (i = 0; i < ssh->count; i++) - ((kstat_named_t *)ssh->private)[i].value.ui64 = 0; + for (i = 0; i < shk->count; i++) + ((kstat_named_t *)shk->private)[i].value.ui64 = 0; } - for (i = ssh->count; i > 0; i--) - if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0) + for (i = shk->count; i > 0; i--) + if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0) break; ksp->ks_ndata = i; @@ -601,22 +492,22 @@ spa_tx_assign_update(kstat_t *ksp, int rw) static void spa_tx_assign_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; + spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; char *name; kstat_named_t *ks; kstat_t *ksp; int i; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); - ssh->count = 42; /* power of two buckets for 1ns to 2,199s */ - ssh->size = ssh->count * sizeof (kstat_named_t); - ssh->private = kmem_alloc(ssh->size, KM_SLEEP); + shk->count = 42; /* power of two buckets for 1ns to 2,199s */ + shk->size = shk->count * sizeof (kstat_named_t); + shk->private = kmem_alloc(shk->size, KM_SLEEP); name = kmem_asprintf("zfs/%s", spa_name(spa)); - for (i = 0; i < ssh->count; i++) { - ks = &((kstat_named_t *)ssh->private)[i]; + for (i = 0; i < shk->count; i++) { + ks = &((kstat_named_t *)shk->private)[i]; ks->data_type = KSTAT_DATA_UINT64; ks->value.ui64 = 0; (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns", @@ -625,13 +516,13 @@ spa_tx_assign_init(spa_t *spa) ksp = kstat_create(name, 0, "dmu_tx_assign", "misc", KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL); - ssh->kstat = ksp; + shk->kstat = ksp; if (ksp) { - ksp->ks_lock = &ssh->lock; - ksp->ks_data = ssh->private; - ksp->ks_ndata = ssh->count; - ksp->ks_data_size = ssh->size; + ksp->ks_lock = &shk->lock; + ksp->ks_data = shk->private; + ksp->ks_ndata = shk->count; + ksp->ks_data_size = shk->size; ksp->ks_private = spa; ksp->ks_update = spa_tx_assign_update; kstat_install(ksp); @@ -642,27 +533,27 @@ spa_tx_assign_init(spa_t *spa) static void spa_tx_assign_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; + spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; kstat_t *ksp; - ksp = ssh->kstat; + ksp = shk->kstat; if (ksp) kstat_delete(ksp); - kmem_free(ssh->private, ssh->size); - mutex_destroy(&ssh->lock); + kmem_free(shk->private, shk->size); + mutex_destroy(&shk->lock); } void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs) { - spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram; + spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; uint64_t idx = 0; - while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1)) + while (((1ULL << idx) < nsecs) && (idx < shk->size - 1)) idx++; - atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64); + atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64); } /* @@ -682,19 +573,19 @@ spa_io_history_update(kstat_t *ksp, int rw) static void spa_io_history_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; char *name; kstat_t *ksp; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); name = kmem_asprintf("zfs/%s", spa_name(spa)); ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0); - ssh->kstat = ksp; + shk->kstat = ksp; if (ksp) { - ksp->ks_lock = &ssh->lock; + ksp->ks_lock = &shk->lock; ksp->ks_private = spa; ksp->ks_update = spa_io_history_update; kstat_install(ksp); @@ -705,12 +596,12 @@ spa_io_history_init(spa_t *spa) static void spa_io_history_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; - if (ssh->kstat) - kstat_delete(ssh->kstat); + if (shk->kstat) + kstat_delete(shk->kstat); - mutex_destroy(&ssh->lock); + mutex_destroy(&shk->lock); } /* @@ -733,7 +624,7 @@ spa_io_history_destroy(spa_t *spa) */ typedef struct spa_mmp_history { - uint64_t mmp_kstat_id; /* unique # for updates */ + uint64_t mmp_node_id; /* unique # for updates */ uint64_t txg; /* txg of last sync */ uint64_t timestamp; /* UTC time MMP write issued */ uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */ @@ -743,20 +634,20 @@ typedef struct spa_mmp_history { int io_error; /* error status of MMP write */ hrtime_t error_start; /* hrtime of start of error period */ hrtime_t duration; /* time from submission to completion */ - list_node_t smh_link; + procfs_list_node_t smh_node; } spa_mmp_history_t; static int -spa_mmp_history_headers(char *buf, size_t size) +spa_mmp_history_show_header(struct seq_file *f) { - (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s " + seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s " "%-10s %s\n", "id", "txg", "timestamp", "error", "duration", "mmp_delay", "vdev_guid", "vdev_label", "vdev_path"); return (0); } static int -spa_mmp_history_data(char *buf, size_t size, void *data) +spa_mmp_history_show(struct seq_file *f, void *data) { spa_mmp_history_t *smh = (spa_mmp_history_t *)data; char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu " @@ -764,8 +655,8 @@ spa_mmp_history_data(char *buf, size_t size, void *data) char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu " "%-10lld %s\n"; - (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt), - (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg, + seq_printf(f, (smh->error_start ? skip_fmt : write_fmt), + (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg, (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error, (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay, (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label, @@ -774,137 +665,86 @@ spa_mmp_history_data(char *buf, size_t size, void *data) return (0); } -/* - * Calculate the address for the next spa_stats_history_t entry. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static void * -spa_mmp_history_addr(kstat_t *ksp, loff_t n) +/* Remove oldest elements from list until there are no more than 'size' left */ +static void +spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size) { - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; - - ASSERT(MUTEX_HELD(&ssh->lock)); - - if (n == 0) - ssh->private = list_tail(&ssh->list); - else if (ssh->private) - ssh->private = list_prev(&ssh->list, ssh->private); - - return (ssh->private); -} - -/* - * When the kstat is written discard all spa_mmp_history_t entries. The - * ssh->lock will be held until ksp->ks_ndata entries are processed. - */ -static int -spa_mmp_history_update(kstat_t *ksp, int rw) -{ - spa_t *spa = ksp->ks_private; - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; - - ASSERT(MUTEX_HELD(&ssh->lock)); - - if (rw == KSTAT_WRITE) { - spa_mmp_history_t *smh; - - while ((smh = list_remove_head(&ssh->list))) { - ssh->size--; - if (smh->vdev_path) - strfree(smh->vdev_path); - kmem_free(smh, sizeof (spa_mmp_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); + spa_mmp_history_t *smh; + while (shl->size > size) { + smh = list_remove_head(&shl->procfs_list.pl_list); + if (smh->vdev_path) + strfree(smh->vdev_path); + kmem_free(smh, sizeof (spa_mmp_history_t)); + shl->size--; } - ksp->ks_ndata = ssh->size; - ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t); + if (size == 0) + ASSERT(list_is_empty(&shl->procfs_list.pl_list)); +} + +static int +spa_mmp_history_clear(procfs_list_t *procfs_list) +{ + spa_history_list_t *shl = procfs_list->pl_private; + mutex_enter(&procfs_list->pl_lock); + spa_mmp_history_truncate(shl, 0); + mutex_exit(&procfs_list->pl_lock); return (0); } static void spa_mmp_history_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; - char *name; - kstat_t *ksp; + spa_history_list_t *shl = &spa->spa_stats.mmp_history; + char *module; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&ssh->list, sizeof (spa_mmp_history_t), - offsetof(spa_mmp_history_t, smh_link)); + shl->size = 0; - ssh->count = 0; - ssh->size = 0; - ssh->private = NULL; + module = kmem_asprintf("zfs/%s", spa_name(spa)); - name = kmem_asprintf("zfs/%s", spa_name(spa)); + shl->procfs_list.pl_private = shl; + procfs_list_install(module, + "multihost", + &shl->procfs_list, + spa_mmp_history_show, + spa_mmp_history_show_header, + spa_mmp_history_clear, + offsetof(spa_mmp_history_t, smh_node)); - ksp = kstat_create(name, 0, "multihost", "misc", - KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - ssh->kstat = ksp; - - if (ksp) { - ksp->ks_lock = &ssh->lock; - ksp->ks_data = NULL; - ksp->ks_private = spa; - ksp->ks_update = spa_mmp_history_update; - kstat_set_raw_ops(ksp, spa_mmp_history_headers, - spa_mmp_history_data, spa_mmp_history_addr); - kstat_install(ksp); - } - strfree(name); + strfree(module); } static void spa_mmp_history_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; - spa_mmp_history_t *smh; - kstat_t *ksp; - - ksp = ssh->kstat; - if (ksp) - kstat_delete(ksp); - - mutex_enter(&ssh->lock); - while ((smh = list_remove_head(&ssh->list))) { - ssh->size--; - if (smh->vdev_path) - strfree(smh->vdev_path); - kmem_free(smh, sizeof (spa_mmp_history_t)); - } - - ASSERT3U(ssh->size, ==, 0); - list_destroy(&ssh->list); - mutex_exit(&ssh->lock); - - mutex_destroy(&ssh->lock); + spa_history_list_t *shl = &spa->spa_stats.mmp_history; + procfs_list_uninstall(&shl->procfs_list); + spa_mmp_history_truncate(shl, 0); + procfs_list_destroy(&shl->procfs_list); } /* * Set duration in existing "skip" record to how long we have waited for a leaf * vdev to become available. * - * Important that we start search at the head of the list where new + * Important that we start search at the tail of the list where new * records are inserted, so this is normally an O(1) operation. */ int -spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id) +spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id) { - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; + spa_history_list_t *shl = &spa->spa_stats.mmp_history; spa_mmp_history_t *smh; int error = ENOENT; - if (zfs_multihost_history == 0 && ssh->size == 0) + if (zfs_multihost_history == 0 && shl->size == 0) return (0); - mutex_enter(&ssh->lock); - for (smh = list_head(&ssh->list); smh != NULL; - smh = list_next(&ssh->list, smh)) { - if (smh->mmp_kstat_id == mmp_kstat_id) { + mutex_enter(&shl->procfs_list.pl_lock); + for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL; + smh = list_prev(&shl->procfs_list.pl_list, smh)) { + if (smh->mmp_node_id == mmp_node_id) { ASSERT3U(smh->io_error, !=, 0); smh->duration = gethrtime() - smh->error_start; smh->vdev_guid++; @@ -912,7 +752,7 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id) break; } } - mutex_exit(&ssh->lock); + mutex_exit(&shl->procfs_list.pl_lock); return (error); } @@ -922,20 +762,20 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id) * See comment re: search order above spa_mmp_history_set_skip(). */ int -spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, +spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error, hrtime_t duration) { - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; + spa_history_list_t *shl = &spa->spa_stats.mmp_history; spa_mmp_history_t *smh; int error = ENOENT; - if (zfs_multihost_history == 0 && ssh->size == 0) + if (zfs_multihost_history == 0 && shl->size == 0) return (0); - mutex_enter(&ssh->lock); - for (smh = list_head(&ssh->list); smh != NULL; - smh = list_next(&ssh->list, smh)) { - if (smh->mmp_kstat_id == mmp_kstat_id) { + mutex_enter(&shl->procfs_list.pl_lock); + for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL; + smh = list_prev(&shl->procfs_list.pl_list, smh)) { + if (smh->mmp_node_id == mmp_node_id) { ASSERT(smh->io_error == 0); smh->io_error = io_error; smh->duration = duration; @@ -943,7 +783,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, break; } } - mutex_exit(&ssh->lock); + mutex_exit(&shl->procfs_list.pl_lock); return (error); } @@ -953,16 +793,16 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, * error == 0 : a write was issued. * error != 0 : a write was not issued because no leaves were found. */ -void * +void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, - uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id, + uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id, int error) { - spa_stats_history_t *ssh = &spa->spa_stats.mmp_history; - spa_mmp_history_t *smh, *rm; + spa_history_list_t *shl = &spa->spa_stats.mmp_history; + spa_mmp_history_t *smh; - if (zfs_multihost_history == 0 && ssh->size == 0) - return (NULL); + if (zfs_multihost_history == 0 && shl->size == 0) + return; smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP); smh->txg = txg; @@ -974,7 +814,7 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, smh->vdev_path = strdup(vd->vdev_path); } smh->vdev_label = label; - smh->mmp_kstat_id = mmp_kstat_id; + smh->mmp_node_id = mmp_node_id; if (error) { smh->io_error = error; @@ -982,21 +822,11 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, smh->vdev_guid = 1; } - mutex_enter(&ssh->lock); - - list_insert_head(&ssh->list, smh); - ssh->size++; - - while (ssh->size > zfs_multihost_history) { - ssh->size--; - rm = list_remove_tail(&ssh->list); - if (rm->vdev_path) - strfree(rm->vdev_path); - kmem_free(rm, sizeof (spa_mmp_history_t)); - } - - mutex_exit(&ssh->lock); - return ((void *)smh); + mutex_enter(&shl->procfs_list.pl_lock); + procfs_list_add(&shl->procfs_list, smh); + shl->size++; + spa_mmp_history_truncate(shl, zfs_multihost_history); + mutex_exit(&shl->procfs_list.pl_lock); } static void * @@ -1023,19 +853,19 @@ spa_state_data(char *buf, size_t size, void *data) static void spa_state_init(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.state; + spa_history_kstat_t *shk = &spa->spa_stats.state; char *name; kstat_t *ksp; - mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); name = kmem_asprintf("zfs/%s", spa_name(spa)); ksp = kstat_create(name, 0, "state", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - ssh->kstat = ksp; + shk->kstat = ksp; if (ksp) { - ksp->ks_lock = &ssh->lock; + ksp->ks_lock = &shk->lock; ksp->ks_data = NULL; ksp->ks_private = spa; ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; @@ -1049,12 +879,12 @@ spa_state_init(spa_t *spa) static void spa_health_destroy(spa_t *spa) { - spa_stats_history_t *ssh = &spa->spa_stats.state; - kstat_t *ksp = ssh->kstat; + spa_history_kstat_t *shk = &spa->spa_stats.state; + kstat_t *ksp = shk->kstat; if (ksp) kstat_delete(ksp); - mutex_destroy(&ssh->lock); + mutex_destroy(&shk->lock); } void diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 30a883f85..89cdf7d81 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -429,16 +429,16 @@ static void vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); avl_add(vdev_queue_type_tree(vq, zio->io_type), zio); - if (ssh->kstat != NULL) { - mutex_enter(&ssh->lock); - kstat_waitq_enter(ssh->kstat->ks_data); - mutex_exit(&ssh->lock); + if (shk->kstat != NULL) { + mutex_enter(&shk->lock); + kstat_waitq_enter(shk->kstat->ks_data); + mutex_exit(&shk->lock); } } @@ -446,16 +446,16 @@ static void vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio); - if (ssh->kstat != NULL) { - mutex_enter(&ssh->lock); - kstat_waitq_exit(ssh->kstat->ks_data); - mutex_exit(&ssh->lock); + if (shk->kstat != NULL) { + mutex_enter(&shk->lock); + kstat_waitq_exit(shk->kstat->ks_data); + mutex_exit(&shk->lock); } } @@ -463,17 +463,17 @@ static void vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active++; avl_add(&vq->vq_active_tree, zio); - if (ssh->kstat != NULL) { - mutex_enter(&ssh->lock); - kstat_runq_enter(ssh->kstat->ks_data); - mutex_exit(&ssh->lock); + if (shk->kstat != NULL) { + mutex_enter(&shk->lock); + kstat_runq_enter(shk->kstat->ks_data); + mutex_exit(&shk->lock); } } @@ -481,17 +481,17 @@ static void vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; - spa_stats_history_t *ssh = &spa->spa_stats.io_history; + spa_history_kstat_t *shk = &spa->spa_stats.io_history; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active--; avl_remove(&vq->vq_active_tree, zio); - if (ssh->kstat != NULL) { - kstat_io_t *ksio = ssh->kstat->ks_data; + if (shk->kstat != NULL) { + kstat_io_t *ksio = shk->kstat->ks_data; - mutex_enter(&ssh->lock); + mutex_enter(&shk->lock); kstat_runq_exit(ksio); if (zio->io_type == ZIO_TYPE_READ) { ksio->reads++; @@ -500,7 +500,7 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) ksio->writes++; ksio->nwritten += zio->io_size; } - mutex_exit(&ssh->lock); + mutex_exit(&shk->lock); } } diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c index ca79893c9..b5f93fd9b 100644 --- a/module/zfs/zfs_debug.c +++ b/module/zfs/zfs_debug.c @@ -24,13 +24,17 @@ */ #include -#include -list_t zfs_dbgmsgs; +typedef struct zfs_dbgmsg { + procfs_list_node_t zdm_node; + time_t zdm_timestamp; + int zdm_size; + char zdm_msg[1]; /* variable length allocation */ +} zfs_dbgmsg_t; + +procfs_list_t zfs_dbgmsgs; int zfs_dbgmsg_size = 0; -kmutex_t zfs_dbgmsgs_lock; int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */ -kstat_t *zfs_dbgmsg_kstat; /* * Internal ZFS debug messages are enabled by default. @@ -47,122 +51,70 @@ kstat_t *zfs_dbgmsg_kstat; int zfs_dbgmsg_enable = 1; static int -zfs_dbgmsg_headers(char *buf, size_t size) +zfs_dbgmsg_show_header(struct seq_file *f) { - (void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message"); - + seq_printf(f, "%-12s %-8s\n", "timestamp", "message"); return (0); } static int -zfs_dbgmsg_data(char *buf, size_t size, void *data) +zfs_dbgmsg_show(struct seq_file *f, void *p) { - zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data; - - (void) snprintf(buf, size, "%-12llu %-s\n", + zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p; + seq_printf(f, "%-12llu %-s\n", (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg); - return (0); } -static void * -zfs_dbgmsg_addr(kstat_t *ksp, loff_t n) -{ - zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private; - - ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock)); - - if (n == 0) - ksp->ks_private = list_head(&zfs_dbgmsgs); - else if (zdm) - ksp->ks_private = list_next(&zfs_dbgmsgs, zdm); - - return (ksp->ks_private); -} - static void zfs_dbgmsg_purge(int max_size) { - zfs_dbgmsg_t *zdm; - int size; - - ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock)); - while (zfs_dbgmsg_size > max_size) { - zdm = list_remove_head(&zfs_dbgmsgs); + zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list); if (zdm == NULL) return; - size = zdm->zdm_size; + int size = zdm->zdm_size; kmem_free(zdm, size); zfs_dbgmsg_size -= size; } } static int -zfs_dbgmsg_update(kstat_t *ksp, int rw) +zfs_dbgmsg_clear(procfs_list_t *procfs_list) { - if (rw == KSTAT_WRITE) - zfs_dbgmsg_purge(0); - + mutex_enter(&zfs_dbgmsgs.pl_lock); + zfs_dbgmsg_purge(0); + mutex_exit(&zfs_dbgmsgs.pl_lock); return (0); } void zfs_dbgmsg_init(void) { - list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t), + procfs_list_install("zfs", + "dbgmsg", + &zfs_dbgmsgs, + zfs_dbgmsg_show, + zfs_dbgmsg_show_header, + zfs_dbgmsg_clear, offsetof(zfs_dbgmsg_t, zdm_node)); - mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL); - - zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc", - KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - if (zfs_dbgmsg_kstat) { - zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock; - zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX; - zfs_dbgmsg_kstat->ks_private = NULL; - zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update; - kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers, - zfs_dbgmsg_data, zfs_dbgmsg_addr); - kstat_install(zfs_dbgmsg_kstat); - } } void zfs_dbgmsg_fini(void) { - if (zfs_dbgmsg_kstat) - kstat_delete(zfs_dbgmsg_kstat); + procfs_list_uninstall(&zfs_dbgmsgs); + zfs_dbgmsg_purge(0); + /* * TODO - decide how to make this permanent */ #ifdef _KERNEL - mutex_enter(&zfs_dbgmsgs_lock); - zfs_dbgmsg_purge(0); - mutex_exit(&zfs_dbgmsgs_lock); - mutex_destroy(&zfs_dbgmsgs_lock); + procfs_list_destroy(&zfs_dbgmsgs); #endif } -void -__zfs_dbgmsg(char *buf) -{ - zfs_dbgmsg_t *zdm; - int size; - - size = sizeof (zfs_dbgmsg_t) + strlen(buf); - zdm = kmem_zalloc(size, KM_SLEEP); - zdm->zdm_size = size; - zdm->zdm_timestamp = gethrestime_sec(); - strcpy(zdm->zdm_msg, buf); - - mutex_enter(&zfs_dbgmsgs_lock); - list_insert_tail(&zfs_dbgmsgs, zdm); - zfs_dbgmsg_size += size; - zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0)); - mutex_exit(&zfs_dbgmsgs_lock); -} - void __set_error(const char *file, const char *func, int line, int err) { @@ -176,6 +128,22 @@ __set_error(const char *file, const char *func, int line, int err) } #ifdef _KERNEL +static void +__zfs_dbgmsg(char *buf) +{ + int size = sizeof (zfs_dbgmsg_t) + strlen(buf); + zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP); + zdm->zdm_size = size; + zdm->zdm_timestamp = gethrestime_sec(); + strcpy(zdm->zdm_msg, buf); + + mutex_enter(&zfs_dbgmsgs.pl_lock); + procfs_list_add(&zfs_dbgmsgs, zdm); + zfs_dbgmsg_size += size; + zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0)); + mutex_exit(&zfs_dbgmsgs.pl_lock); +} + void __dprintf(const char *file, const char *func, int line, const char *fmt, ...) { @@ -244,14 +212,12 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) void zfs_dbgmsg_print(const char *tag) { - zfs_dbgmsg_t *zdm; - (void) printf("ZFS_DBGMSG(%s):\n", tag); - mutex_enter(&zfs_dbgmsgs_lock); - for (zdm = list_head(&zfs_dbgmsgs); zdm; - zdm = list_next(&zfs_dbgmsgs, zdm)) + mutex_enter(&zfs_dbgmsgs.pl_lock); + for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL; + zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) (void) printf("%s\n", zdm->zdm_msg); - mutex_exit(&zfs_dbgmsgs_lock); + mutex_exit(&zfs_dbgmsgs.pl_lock); } #endif /* _KERNEL */ diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 4b41c3f74..95e70f043 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -584,10 +584,6 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', post = tags = ['functional', 'inuse'] -[tests/functional/kstat] -tests = ['state'] -tags = ['functional', 'kstat'] - [tests/functional/large_files] tests = ['large_files_001_pos', 'large_files_002_pos'] tags = ['functional', 'large_files'] @@ -672,6 +668,11 @@ tags = ['functional', 'poolversion'] tests = ['privilege_001_pos', 'privilege_002_pos'] tags = ['functional', 'privilege'] +[tests/functional/procfs] +tests = ['procfs_list_basic', 'procfs_list_concurrent_readers', + 'procfs_list_stale_read', 'pool_state'] +tags = ['functional', 'procfs'] + [tests/functional/projectquota] tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos', 'projectquota_001_pos', 'projectquota_002_pos', 'projectquota_003_pos', diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index e0a4aca99..961a34027 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -29,7 +29,6 @@ SUBDIRS = \ inheritance \ inuse \ io \ - kstat \ large_files \ largest_pool \ libzfs \ @@ -48,6 +47,7 @@ SUBDIRS = \ pool_names \ poolversion \ privilege \ + procfs \ projectquota \ quota \ raidz \ diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am deleted file mode 100644 index 8ad83ec3e..000000000 --- a/tests/zfs-tests/tests/functional/kstat/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat -dist_pkgdata_SCRIPTS = \ - setup.ksh \ - cleanup.ksh \ - state.ksh diff --git a/tests/zfs-tests/tests/functional/procfs/Makefile.am b/tests/zfs-tests/tests/functional/procfs/Makefile.am new file mode 100644 index 000000000..a7f022d9f --- /dev/null +++ b/tests/zfs-tests/tests/functional/procfs/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/procfs +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + procfs_list_basic.ksh \ + procfs_list_concurrent_readers.ksh \ + procfs_list_stale_read.ksh \ + pool_state.ksh diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/procfs/cleanup.ksh similarity index 92% rename from tests/zfs-tests/tests/functional/kstat/cleanup.ksh rename to tests/zfs-tests/tests/functional/procfs/cleanup.ksh index 8a212ce37..8fe46577e 100755 --- a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/procfs/cleanup.ksh @@ -19,8 +19,9 @@ # # CDDL HEADER END # + # -# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2018 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib diff --git a/tests/zfs-tests/tests/functional/kstat/state.ksh b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/kstat/state.ksh rename to tests/zfs-tests/tests/functional/procfs/pool_state.ksh diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh new file mode 100755 index 000000000..c9eff3649 --- /dev/null +++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test that we can read from and write to a file in procfs whose contents is +# backed by a linked list. +# +# STRATEGY: +# 1. Take some snapshots of a filesystem, which will cause some messages to be +# written to the zfs dbgmsgs. +# 2. Read the dbgmsgs via procfs and verify that the expected messages are +# present. +# 3. Write to the dbgmsgs file to clear the messages. +# 4. Read the dbgmsgs again, and make sure the messages are no longer present. +# + +function cleanup +{ + datasetexists $FS && log_must zfs destroy -r $FS +} + +function count_snap_cmds +{ + typeset expected_count=$1 + count=$(grep "command: zfs snapshot $FS@testsnapshot" | wc -l) + log_must eval "[[ $count -eq $expected_count ]]" +} + +typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg +typeset -r FS=$TESTPOOL/fs +typeset snap_msgs + +log_onexit cleanup + +# Clear out old messages +echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG" + +log_must zfs create $FS +for i in {1..20}; do + log_must zfs snapshot "$FS@testsnapshot$i" +done +log_must zpool sync $TESTPOOL + +# +# Read the debug message file in small chunks to make sure that the read is +# split up into multiple syscalls. This tests that when a syscall begins we +# correctly pick up in the list of messages where the previous syscall left +# off. The size of the read can affect how many bytes the seq_file code has +# left in its internal buffer, which in turn can affect the relative pos that +# the seq_file code picks up at when the next read starts. Try a few +# different size reads to make sure we can handle each case. +# +# Check that the file has the right contents by grepping for some of the +# messages that we expect to be present. +# +for chunk_sz in {1,64,256,1024,4096}; do + dd if=$ZFS_DBGMSG bs=$chunk_sz | count_snap_cmds 20 +done + +# Clear out old messages and check that they really are gone +echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG" +cat $ZFS_DBGMSG | count_snap_cmds 0 +# +# Even though we don't expect any messages in the file, reading should still +# succeed. +# +log_must cat $ZFS_DBGMSG + +log_pass "Basic reading/writing of procfs file backed by linked list successful" diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh new file mode 100755 index 000000000..473de5c84 --- /dev/null +++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh @@ -0,0 +1,82 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Make sure that interleaving reads from different readers does not affect the +# results that are returned. +# +# STRATEGY: +# 1. Make sure a few debug messages have been logged. +# 2. Open the procfs file and start reading from it. +# 3. Open the file again, and read its entire contents. +# 4. Resume reading from the first instance. +# 5. Check that the contents read by the two instances are identical. +# + +function cleanup +{ + [[ -z $msgs1 ]] || log_must rm $msgs1 + [[ -z $msgs2 ]] || log_must rm $msgs2 + datasetexists $FS && log_must zfs destroy -r $FS +} + +typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg +typeset -r FS=$TESTPOOL/fs +typeset msgs1 msgs2 + +log_onexit cleanup + +# Clear out old messages +echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG" + +# Add some new messages +log_must zfs create $FS +for i in {1..20}; do + log_must zfs snapshot "$FS@testsnapshot$i" +done +log_must zpool sync $TESTPOOL + +msgs1=$(mktemp) || log_fail +msgs2=$(mktemp) || log_fail + +# +# Start reading file, pause and read it from another process, and then finish +# reading. +# +{ dd bs=512 count=4; cat $ZFS_DBGMSG >$msgs1; cat; } <$ZFS_DBGMSG >$msgs2 + +# +# Truncate the result of the read that completed second in case it picked up an +# extra message that was logged after the first read completed. +# +log_must truncate -s $(stat -c "%s" $msgs1) $msgs2 + +log_must diff $msgs1 $msgs2 + +log_pass "Concurrent readers receive identical results" diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh new file mode 100755 index 000000000..c363e7f8b --- /dev/null +++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Make sure errors caused by messages being dropped from the list backing the +# procfs file are handled gracefully. +# +# STRATEGY: +# 1. Make sure a few entries have been logged. +# 2. Open the procfs file and start reading from it. +# 3. Write to the file to cause its contents to be dropped. +# 4. Resume reading from the first instance, and check that the expected +# error is received. +# 5. Repeat steps 1-4, except instead of dropping all the messages by writing +# to the file, cause enough new messages to be written that the old messages +# are dropped. +# + +function cleanup +{ + echo $default_max_entries >$MAX_ENTRIES_PARAM || log_fail +} + +function sync_n +{ + for i in {1..$1}; do + log_must zpool sync $TESTPOOL + done + return 0 +} + +function do_test +{ + typeset cmd=$1 + + # Clear out old entries + echo 0 >$TXG_HIST || log_fail + + # Add some new entries + sync_n 20 + + # Confirm that there actually is something in the file. + [[ $(wc -l <$TXG_HIST) -ge 20 ]] || log_fail "expected more entries" + + # + # Start reading file, pause and run a command that will cause the + # current offset into the file to become invalid, and then try to + # finish reading. + # + { + log_must dd bs=512 count=4 >/dev/null + log_must eval "$cmd" + cat 2>&1 >/dev/null | log_must grep "Input/output error" + } <$TXG_HIST +} + +typeset -r TXG_HIST=/proc/spl/kstat/zfs/$TESTPOOL/txgs +typeset MAX_ENTRIES_PARAM=/sys/module/zfs/parameters/zfs_txg_history +typeset default_max_entries + +log_onexit cleanup + +default_max_entries=$(cat $MAX_ENTRIES_PARAM) || log_fail +echo 50 >$MAX_ENTRIES_PARAM || log_fail + +# Clear all of the existing entries. +do_test "echo 0 >$TXG_HIST" + +# Add enough new entries to the list that all of the old ones are dropped. +do_test "sync_n 60" + +log_pass "Attempting to read dropped message returns expected error" diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/procfs/setup.ksh similarity index 86% rename from tests/zfs-tests/tests/functional/kstat/setup.ksh rename to tests/zfs-tests/tests/functional/procfs/setup.ksh index 57717a096..b3812dbdc 100755 --- a/tests/zfs-tests/tests/functional/kstat/setup.ksh +++ b/tests/zfs-tests/tests/functional/procfs/setup.ksh @@ -19,16 +19,16 @@ # # CDDL HEADER END # + # -# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2018 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib if ! is_linux ; then - log_unsupported "/proc/spl/kstat//health only supported on Linux" + log_unsupported "procfs is only used on Linux" fi default_mirror_setup $DISKS - log_pass