mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-15 04:30:33 +03:00
6b64382b17
= Motivation While dealing with another performance issue (see 126118f) we noticed that we spend a lot of time in various places in the kernel when constructing long nvlists. The problem is that when an nvlist is created with the NV_UNIQUE_NAME set (which is the case most of the time), we do a linear search through the whole list to ensure uniqueness for every entry we add. An example of the above scenario can be seen in the following flamegraph, where more than have the time of the zfsdev_ioctl() is spent on constructing nvlists. Flamegraph: https://sdimitro.github.io/img/flame/sdimitro_snap_unmount3.svg Adding a table to speed up lookups will help situations where we just construct an nvlist (like the scenario above), in addition to regular lookups and removals. = What this patch does In this diff we've implemented a hash-table on top of the nvlist code that converts most nvlist operations from O(# number of entries) to O(1)* (the start is for amortized time as the hash-table grows and shrinks depending on the # of entries - plain lookup is strictly O(1)). = Performance Analysis To analyze the performance improvement I just used the setup from the snapshot deletion issue mentioned above in the Motivation section. Basically I created 10K filesystems with one snapshot each and then I just used the API of libZFS_Core to pass down an nvlist of all the snapshots to have them deleted. The reason I used my own driver program was to have clean performance results of what actually happens in the kernel. The flamegraphs and wall clock times mentioned below were gathered from the start to the end of the driver program's run. Between trials the testpool used was completely destroyed, the system was rebooted and the testpool was completely recreated. The reason for this dance was to get consistent results. == Results (before patch): === Sampling Flamegraphs [Trial 1] https://sdimitro.github.io/img/flame/DLPX-53417/trial-A.svg [Trial 2] https://sdimitro.github.io/img/flame/DLPX-53417/trial-A2.svg [Trial 3] https://sdimitro.github.io/img/flame/DLPX-53417/trial-A3.svg === Wall clock times (in seconds) ``` [Trial 4] real 5.3 user 0.4 sys 2.3 [Trial 5] real 8.2 user 0.4 sys 2.4 [Trial 6] real 6.0 user 0.5 sys 2.3 ``` == Results (after patch): === Sampling Flamegraphs [Trial 1] https://sdimitro.github.io/img/flame/DLPX-53417/trial-Ae.svg [Trial 2] https://sdimitro.github.io/img/flame/DLPX-53417/trial-A2e.svg [Trial 3] https://sdimitro.github.io/img/flame/DLPX-53417/trial-A3e.svg === Wall clock times (in seconds) ``` [Trial 4] real 4.9 user 0.0 sys 0.9 [Trial 5] real 3.8 user 0.0 sys 0.9 [Trial 6] real 3.6 user 0.0 sys 0.9 ``` == Analysis The results between the trials are consistent so in this sections I will only talk about the flamegraph results from trial-1 and the wall-clock results from trial-4. From trial-1 we can see that zfs_dev_ioctl() goes from 2,331 to 996 samples counts. Specifically, the samples from fnvlist_add_nvlist() and spa_history_log_nvl() are almost gone (~500 & ~800 to 5 & 5 samples), leaving zfs_ioc_destroy_snaps() to dominate most samples from zfs_dev_ioctl(). From trial-4 we see that the user time dropped to 0 secods. I believe the consistent 0.4 seconds before my patch was applied was due to my driver program constructing the long nvlist of snapshots so it can pass it to the kernel. As for the system time, the effect there is more clear (2.3 down to 0.9 seconds). Porting Notes: * DATA_TYPE_DONTCARE case added to switch in fm_nvprintr() and zpool_do_events_nvprint(). Authored by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com> Reviewed by: Matt Ahrens <matt@delphix.com> Reviewed by: Sebastien Roy <sebastien.roy@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/9580 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/b5eca7b1 Closes #7748
91 lines
2.2 KiB
C
91 lines
2.2 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License, Version 1.0 only
|
|
* (the "License"). You may not use this file except in compliance
|
|
* with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2017 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#ifndef _NVPAIR_IMPL_H
|
|
#define _NVPAIR_IMPL_H
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#include <sys/nvpair.h>
|
|
|
|
/*
|
|
* The structures here provided for information and debugging purposes only
|
|
* may be changed in the future.
|
|
*/
|
|
|
|
/*
|
|
* implementation linked list for pre-packed data
|
|
*/
|
|
typedef struct i_nvp i_nvp_t;
|
|
|
|
struct i_nvp {
|
|
union {
|
|
/* ensure alignment */
|
|
uint64_t _nvi_align;
|
|
|
|
struct {
|
|
/* pointer to next nvpair */
|
|
i_nvp_t *_nvi_next;
|
|
|
|
/* pointer to prev nvpair */
|
|
i_nvp_t *_nvi_prev;
|
|
|
|
/* next pair in table bucket */
|
|
i_nvp_t *_nvi_hashtable_next;
|
|
} _nvi;
|
|
} _nvi_un;
|
|
|
|
/* nvpair */
|
|
nvpair_t nvi_nvp;
|
|
};
|
|
#define nvi_next _nvi_un._nvi._nvi_next
|
|
#define nvi_prev _nvi_un._nvi._nvi_prev
|
|
#define nvi_hashtable_next _nvi_un._nvi._nvi_hashtable_next
|
|
|
|
typedef struct {
|
|
i_nvp_t *nvp_list; /* linked list of nvpairs */
|
|
i_nvp_t *nvp_last; /* last nvpair */
|
|
i_nvp_t *nvp_curr; /* current walker nvpair */
|
|
nv_alloc_t *nvp_nva; /* pluggable allocator */
|
|
uint32_t nvp_stat; /* internal state */
|
|
|
|
i_nvp_t **nvp_hashtable; /* table of entries used for lookup */
|
|
uint32_t nvp_nbuckets; /* # of buckets in hash table */
|
|
uint32_t nvp_nentries; /* # of entries in hash table */
|
|
} nvpriv_t;
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _NVPAIR_IMPL_H */
|