mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
OpenZFS restructuring - move platform specific sources
Move platform specific Linux source under module/os/linux/
and update the build system accordingly. Additional code
restructuring will follow to make the common code fully
portable.
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Macy <mmacy@FreeBSD.org>
Closes #9206
This commit is contained in:
committed by
Brian Behlendorf
parent
870e7a52c1
commit
bced7e3aaa
@@ -0,0 +1,18 @@
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-atomic.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-condvar.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-cred.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-err.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-generic.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-kmem.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-kobj.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-kstat.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-proc.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-taskq.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-thread.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-tsd.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-vmem.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-vnode.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-xdr.o
|
||||
$(MODULE)-objs += ../os/linux/spl/spl-zlib.o
|
||||
@@ -0,0 +1,16 @@
|
||||
The Solaris Porting Layer, SPL, is a Linux kernel module which provides a
|
||||
compatibility layer used by the [ZFS on Linux](http://zfsonlinux.org) project.
|
||||
|
||||
# Installation
|
||||
|
||||
The latest version of the SPL is maintained as part of this repository.
|
||||
Only when building ZFS version 0.7.x or earlier must an external SPL release
|
||||
be used. These releases can be found at:
|
||||
|
||||
* Version 0.7.x: https://github.com/zfsonlinux/spl/tree/spl-0.7-release
|
||||
* Version 0.6.5.x: https://github.com/zfsonlinux/spl/tree/spl-0.6.5-release
|
||||
|
||||
# Release
|
||||
|
||||
The SPL is released under a GPLv2 license.
|
||||
For more details see the NOTICE and THIRDPARTYLICENSE files; `UCRL-CODE-235197`
|
||||
@@ -0,0 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
||||
@@ -0,0 +1 @@
|
||||
COMPATIBILITY LAYER FOR OPENZFS ON LINUX
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Atomic Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/atomic.h>
|
||||
|
||||
#ifdef ATOMIC_SPINLOCK
|
||||
/* Global atomic lock declarations */
|
||||
DEFINE_SPINLOCK(atomic32_lock);
|
||||
DEFINE_SPINLOCK(atomic64_lock);
|
||||
|
||||
EXPORT_SYMBOL(atomic32_lock);
|
||||
EXPORT_SYMBOL(atomic64_lock);
|
||||
#endif /* ATOMIC_SPINLOCK */
|
||||
@@ -0,0 +1,461 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Credential Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/condvar.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/compiler_compat.h>
|
||||
#include <linux/mod_compat.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#ifdef HAVE_SCHED_SIGNAL_HEADER
|
||||
#include <linux/sched/signal.h>
|
||||
#endif
|
||||
|
||||
#define MAX_HRTIMEOUT_SLACK_US 1000
|
||||
unsigned int spl_schedule_hrtimeout_slack_us = 0;
|
||||
|
||||
static int
|
||||
param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
unsigned long val;
|
||||
int error;
|
||||
|
||||
error = kstrtoul(buf, 0, &val);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (val > MAX_HRTIMEOUT_SLACK_US)
|
||||
return (-EINVAL);
|
||||
|
||||
error = param_set_uint(buf, kp);
|
||||
if (error < 0)
|
||||
return (error);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
|
||||
param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
|
||||
MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
|
||||
"schedule_hrtimeout_range() delta/slack value in us, default(0)");
|
||||
|
||||
void
|
||||
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
|
||||
{
|
||||
ASSERT(cvp);
|
||||
ASSERT(name == NULL);
|
||||
ASSERT(type == CV_DEFAULT);
|
||||
ASSERT(arg == NULL);
|
||||
|
||||
cvp->cv_magic = CV_MAGIC;
|
||||
init_waitqueue_head(&cvp->cv_event);
|
||||
init_waitqueue_head(&cvp->cv_destroy);
|
||||
atomic_set(&cvp->cv_waiters, 0);
|
||||
atomic_set(&cvp->cv_refs, 1);
|
||||
cvp->cv_mutex = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_init);
|
||||
|
||||
static int
|
||||
cv_destroy_wakeup(kcondvar_t *cvp)
|
||||
{
|
||||
if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
|
||||
ASSERT(cvp->cv_mutex == NULL);
|
||||
ASSERT(!waitqueue_active(&cvp->cv_event));
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
__cv_destroy(kcondvar_t *cvp)
|
||||
{
|
||||
ASSERT(cvp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
|
||||
cvp->cv_magic = CV_DESTROY;
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
|
||||
/* Block until all waiters are woken and references dropped. */
|
||||
while (cv_destroy_wakeup(cvp) == 0)
|
||||
wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
|
||||
|
||||
ASSERT3P(cvp->cv_mutex, ==, NULL);
|
||||
ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
|
||||
ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
|
||||
ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_destroy);
|
||||
|
||||
static void
|
||||
cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
kmutex_t *m;
|
||||
|
||||
ASSERT(cvp);
|
||||
ASSERT(mp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
ASSERT(mutex_owned(mp));
|
||||
atomic_inc(&cvp->cv_refs);
|
||||
|
||||
m = READ_ONCE(cvp->cv_mutex);
|
||||
if (!m)
|
||||
m = xchg(&cvp->cv_mutex, mp);
|
||||
/* Ensure the same mutex is used by all callers */
|
||||
ASSERT(m == NULL || m == mp);
|
||||
|
||||
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
|
||||
atomic_inc(&cvp->cv_waiters);
|
||||
|
||||
/*
|
||||
* Mutex should be dropped after prepare_to_wait() this
|
||||
* ensures we're linked in to the waiters list and avoids the
|
||||
* race where 'cvp->cv_waiters > 0' but the list is empty.
|
||||
*/
|
||||
mutex_exit(mp);
|
||||
if (io)
|
||||
io_schedule();
|
||||
else
|
||||
schedule();
|
||||
|
||||
/* No more waiters a different mutex could be used */
|
||||
if (atomic_dec_and_test(&cvp->cv_waiters)) {
|
||||
/*
|
||||
* This is set without any lock, so it's racy. But this is
|
||||
* just for debug anyway, so make it best-effort
|
||||
*/
|
||||
cvp->cv_mutex = NULL;
|
||||
wake_up(&cvp->cv_destroy);
|
||||
}
|
||||
|
||||
finish_wait(&cvp->cv_event, &wait);
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
|
||||
/*
|
||||
* Hold mutex after we release the cvp, otherwise we could dead lock
|
||||
* with a thread holding the mutex and call cv_destroy.
|
||||
*/
|
||||
mutex_enter(mp);
|
||||
}
|
||||
|
||||
void
|
||||
__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
|
||||
{
|
||||
cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_wait);
|
||||
|
||||
void
|
||||
__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
|
||||
{
|
||||
cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_wait_io);
|
||||
|
||||
int
|
||||
__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
|
||||
{
|
||||
cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
|
||||
|
||||
return (signal_pending(current) ? 0 : 1);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_wait_io_sig);
|
||||
|
||||
int
|
||||
__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
|
||||
{
|
||||
cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
|
||||
|
||||
return (signal_pending(current) ? 0 : 1);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_wait_sig);
|
||||
|
||||
#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
|
||||
#define spl_io_schedule_timeout(t) io_schedule_timeout(t)
|
||||
#else
|
||||
|
||||
struct spl_task_timer {
|
||||
struct timer_list timer;
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
static void
|
||||
__cv_wakeup(spl_timer_list_t t)
|
||||
{
|
||||
struct timer_list *tmr = (struct timer_list *)t;
|
||||
struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
|
||||
|
||||
wake_up_process(task_timer->task);
|
||||
}
|
||||
|
||||
static long
|
||||
spl_io_schedule_timeout(long time_left)
|
||||
{
|
||||
long expire_time = jiffies + time_left;
|
||||
struct spl_task_timer task_timer;
|
||||
struct timer_list *timer = &task_timer.timer;
|
||||
|
||||
task_timer.task = current;
|
||||
|
||||
timer_setup(timer, __cv_wakeup, 0);
|
||||
|
||||
timer->expires = expire_time;
|
||||
add_timer(timer);
|
||||
|
||||
io_schedule();
|
||||
|
||||
del_timer_sync(timer);
|
||||
|
||||
time_left = expire_time - jiffies;
|
||||
|
||||
return (time_left < 0 ? 0 : time_left);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 'expire_time' argument is an absolute wall clock time in jiffies.
|
||||
* Return value is time left (expire_time - now) or -1 if timeout occurred.
|
||||
*/
|
||||
static clock_t
|
||||
__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
|
||||
int state, int io)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
kmutex_t *m;
|
||||
clock_t time_left;
|
||||
|
||||
ASSERT(cvp);
|
||||
ASSERT(mp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
ASSERT(mutex_owned(mp));
|
||||
|
||||
/* XXX - Does not handle jiffie wrap properly */
|
||||
time_left = expire_time - jiffies;
|
||||
if (time_left <= 0)
|
||||
return (-1);
|
||||
|
||||
atomic_inc(&cvp->cv_refs);
|
||||
m = READ_ONCE(cvp->cv_mutex);
|
||||
if (!m)
|
||||
m = xchg(&cvp->cv_mutex, mp);
|
||||
/* Ensure the same mutex is used by all callers */
|
||||
ASSERT(m == NULL || m == mp);
|
||||
|
||||
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
|
||||
atomic_inc(&cvp->cv_waiters);
|
||||
|
||||
/*
|
||||
* Mutex should be dropped after prepare_to_wait() this
|
||||
* ensures we're linked in to the waiters list and avoids the
|
||||
* race where 'cvp->cv_waiters > 0' but the list is empty.
|
||||
*/
|
||||
mutex_exit(mp);
|
||||
if (io)
|
||||
time_left = spl_io_schedule_timeout(time_left);
|
||||
else
|
||||
time_left = schedule_timeout(time_left);
|
||||
|
||||
/* No more waiters a different mutex could be used */
|
||||
if (atomic_dec_and_test(&cvp->cv_waiters)) {
|
||||
/*
|
||||
* This is set without any lock, so it's racy. But this is
|
||||
* just for debug anyway, so make it best-effort
|
||||
*/
|
||||
cvp->cv_mutex = NULL;
|
||||
wake_up(&cvp->cv_destroy);
|
||||
}
|
||||
|
||||
finish_wait(&cvp->cv_event, &wait);
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
|
||||
/*
|
||||
* Hold mutex after we release the cvp, otherwise we could dead lock
|
||||
* with a thread holding the mutex and call cv_destroy.
|
||||
*/
|
||||
mutex_enter(mp);
|
||||
return (time_left > 0 ? time_left : -1);
|
||||
}
|
||||
|
||||
clock_t
|
||||
__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
|
||||
{
|
||||
return (__cv_timedwait_common(cvp, mp, exp_time,
|
||||
TASK_UNINTERRUPTIBLE, 0));
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_timedwait);
|
||||
|
||||
clock_t
|
||||
__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
|
||||
{
|
||||
return (__cv_timedwait_common(cvp, mp, exp_time,
|
||||
TASK_UNINTERRUPTIBLE, 1));
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_timedwait_io);
|
||||
|
||||
clock_t
|
||||
__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
|
||||
{
|
||||
return (__cv_timedwait_common(cvp, mp, exp_time,
|
||||
TASK_INTERRUPTIBLE, 0));
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_timedwait_sig);
|
||||
|
||||
/*
|
||||
* 'expire_time' argument is an absolute clock time in nanoseconds.
|
||||
* Return value is time left (expire_time - now) or -1 if timeout occurred.
|
||||
*/
|
||||
static clock_t
|
||||
__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
|
||||
hrtime_t res, int state)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
kmutex_t *m;
|
||||
hrtime_t time_left;
|
||||
ktime_t ktime_left;
|
||||
u64 slack = 0;
|
||||
|
||||
ASSERT(cvp);
|
||||
ASSERT(mp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
ASSERT(mutex_owned(mp));
|
||||
|
||||
time_left = expire_time - gethrtime();
|
||||
if (time_left <= 0)
|
||||
return (-1);
|
||||
|
||||
atomic_inc(&cvp->cv_refs);
|
||||
m = READ_ONCE(cvp->cv_mutex);
|
||||
if (!m)
|
||||
m = xchg(&cvp->cv_mutex, mp);
|
||||
/* Ensure the same mutex is used by all callers */
|
||||
ASSERT(m == NULL || m == mp);
|
||||
|
||||
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
|
||||
atomic_inc(&cvp->cv_waiters);
|
||||
|
||||
/*
|
||||
* Mutex should be dropped after prepare_to_wait() this
|
||||
* ensures we're linked in to the waiters list and avoids the
|
||||
* race where 'cvp->cv_waiters > 0' but the list is empty.
|
||||
*/
|
||||
mutex_exit(mp);
|
||||
|
||||
ktime_left = ktime_set(0, time_left);
|
||||
slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
|
||||
MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
|
||||
schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
|
||||
|
||||
/* No more waiters a different mutex could be used */
|
||||
if (atomic_dec_and_test(&cvp->cv_waiters)) {
|
||||
/*
|
||||
* This is set without any lock, so it's racy. But this is
|
||||
* just for debug anyway, so make it best-effort
|
||||
*/
|
||||
cvp->cv_mutex = NULL;
|
||||
wake_up(&cvp->cv_destroy);
|
||||
}
|
||||
|
||||
finish_wait(&cvp->cv_event, &wait);
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
|
||||
mutex_enter(mp);
|
||||
time_left = expire_time - gethrtime();
|
||||
return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
|
||||
*/
|
||||
static clock_t
|
||||
cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
|
||||
hrtime_t res, int flag, int state)
|
||||
{
|
||||
if (!(flag & CALLOUT_FLAG_ABSOLUTE))
|
||||
tim += gethrtime();
|
||||
|
||||
return (__cv_timedwait_hires(cvp, mp, tim, res, state));
|
||||
}
|
||||
|
||||
clock_t
|
||||
cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
|
||||
int flag)
|
||||
{
|
||||
return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
|
||||
TASK_UNINTERRUPTIBLE));
|
||||
}
|
||||
EXPORT_SYMBOL(cv_timedwait_hires);
|
||||
|
||||
clock_t
|
||||
cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
|
||||
hrtime_t res, int flag)
|
||||
{
|
||||
return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
|
||||
TASK_INTERRUPTIBLE));
|
||||
}
|
||||
EXPORT_SYMBOL(cv_timedwait_sig_hires);
|
||||
|
||||
void
|
||||
__cv_signal(kcondvar_t *cvp)
|
||||
{
|
||||
ASSERT(cvp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
atomic_inc(&cvp->cv_refs);
|
||||
|
||||
/*
|
||||
* All waiters are added with WQ_FLAG_EXCLUSIVE so only one
|
||||
* waiter will be set runnable with each call to wake_up().
|
||||
* Additionally wake_up() holds a spin_lock associated with
|
||||
* the wait queue to ensure we don't race waking up processes.
|
||||
*/
|
||||
if (atomic_read(&cvp->cv_waiters) > 0)
|
||||
wake_up(&cvp->cv_event);
|
||||
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_signal);
|
||||
|
||||
void
|
||||
__cv_broadcast(kcondvar_t *cvp)
|
||||
{
|
||||
ASSERT(cvp);
|
||||
ASSERT(cvp->cv_magic == CV_MAGIC);
|
||||
atomic_inc(&cvp->cv_refs);
|
||||
|
||||
/*
|
||||
* Wake_up_all() will wake up all waiters even those which
|
||||
* have the WQ_FLAG_EXCLUSIVE flag set.
|
||||
*/
|
||||
if (atomic_read(&cvp->cv_waiters) > 0)
|
||||
wake_up_all(&cvp->cv_event);
|
||||
|
||||
atomic_dec(&cvp->cv_refs);
|
||||
}
|
||||
EXPORT_SYMBOL(__cv_broadcast);
|
||||
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Credential Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/cred.h>
|
||||
|
||||
static int
|
||||
#ifdef HAVE_KUIDGID_T
|
||||
cr_groups_search(const struct group_info *group_info, kgid_t grp)
|
||||
#else
|
||||
cr_groups_search(const struct group_info *group_info, gid_t grp)
|
||||
#endif
|
||||
{
|
||||
unsigned int left, right, mid;
|
||||
int cmp;
|
||||
|
||||
if (!group_info)
|
||||
return (0);
|
||||
|
||||
left = 0;
|
||||
right = group_info->ngroups;
|
||||
while (left < right) {
|
||||
mid = (left + right) / 2;
|
||||
cmp = KGID_TO_SGID(grp) -
|
||||
KGID_TO_SGID(GROUP_AT(group_info, mid));
|
||||
|
||||
if (cmp > 0)
|
||||
left = mid + 1;
|
||||
else if (cmp < 0)
|
||||
right = mid;
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Hold a reference on the credential */
|
||||
void
|
||||
crhold(cred_t *cr)
|
||||
{
|
||||
(void) get_cred((const cred_t *)cr);
|
||||
}
|
||||
|
||||
/* Free a reference on the credential */
|
||||
void
|
||||
crfree(cred_t *cr)
|
||||
{
|
||||
put_cred((const cred_t *)cr);
|
||||
}
|
||||
|
||||
/* Return the number of supplemental groups */
|
||||
int
|
||||
crgetngroups(const cred_t *cr)
|
||||
{
|
||||
struct group_info *gi;
|
||||
int rc;
|
||||
|
||||
gi = cr->group_info;
|
||||
rc = gi->ngroups;
|
||||
#ifndef HAVE_GROUP_INFO_GID
|
||||
/*
|
||||
* For Linux <= 4.8,
|
||||
* crgetgroups will only returns gi->blocks[0], which contains only
|
||||
* the first NGROUPS_PER_BLOCK groups.
|
||||
*/
|
||||
if (rc > NGROUPS_PER_BLOCK) {
|
||||
WARN_ON_ONCE(1);
|
||||
rc = NGROUPS_PER_BLOCK;
|
||||
}
|
||||
#endif
|
||||
return (rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an array of supplemental gids. The returned address is safe
|
||||
* to use as long as the caller has taken a reference with crhold().
|
||||
*
|
||||
* Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d
|
||||
* array via ->gid.
|
||||
*/
|
||||
gid_t *
|
||||
crgetgroups(const cred_t *cr)
|
||||
{
|
||||
struct group_info *gi;
|
||||
gid_t *gids = NULL;
|
||||
|
||||
gi = cr->group_info;
|
||||
#ifdef HAVE_GROUP_INFO_GID
|
||||
gids = KGIDP_TO_SGIDP(gi->gid);
|
||||
#else
|
||||
if (gi->nblocks > 0)
|
||||
gids = KGIDP_TO_SGIDP(gi->blocks[0]);
|
||||
#endif
|
||||
return (gids);
|
||||
}
|
||||
|
||||
/* Check if the passed gid is available in supplied credential. */
|
||||
int
|
||||
groupmember(gid_t gid, const cred_t *cr)
|
||||
{
|
||||
struct group_info *gi;
|
||||
int rc;
|
||||
|
||||
gi = cr->group_info;
|
||||
rc = cr_groups_search(gi, SGID_TO_KGID(gid));
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
/* Return the effective user id */
|
||||
uid_t
|
||||
crgetuid(const cred_t *cr)
|
||||
{
|
||||
return (KUID_TO_SUID(cr->euid));
|
||||
}
|
||||
|
||||
/* Return the real user id */
|
||||
uid_t
|
||||
crgetruid(const cred_t *cr)
|
||||
{
|
||||
return (KUID_TO_SUID(cr->uid));
|
||||
}
|
||||
|
||||
/* Return the saved user id */
|
||||
uid_t
|
||||
crgetsuid(const cred_t *cr)
|
||||
{
|
||||
return (KUID_TO_SUID(cr->suid));
|
||||
}
|
||||
|
||||
/* Return the filesystem user id */
|
||||
uid_t
|
||||
crgetfsuid(const cred_t *cr)
|
||||
{
|
||||
return (KUID_TO_SUID(cr->fsuid));
|
||||
}
|
||||
|
||||
/* Return the effective group id */
|
||||
gid_t
|
||||
crgetgid(const cred_t *cr)
|
||||
{
|
||||
return (KGID_TO_SGID(cr->egid));
|
||||
}
|
||||
|
||||
/* Return the real group id */
|
||||
gid_t
|
||||
crgetrgid(const cred_t *cr)
|
||||
{
|
||||
return (KGID_TO_SGID(cr->gid));
|
||||
}
|
||||
|
||||
/* Return the saved group id */
|
||||
gid_t
|
||||
crgetsgid(const cred_t *cr)
|
||||
{
|
||||
return (KGID_TO_SGID(cr->sgid));
|
||||
}
|
||||
|
||||
/* Return the filesystem group id */
|
||||
gid_t
|
||||
crgetfsgid(const cred_t *cr)
|
||||
{
|
||||
return (KGID_TO_SGID(cr->fsgid));
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(crhold);
|
||||
EXPORT_SYMBOL(crfree);
|
||||
EXPORT_SYMBOL(crgetuid);
|
||||
EXPORT_SYMBOL(crgetruid);
|
||||
EXPORT_SYMBOL(crgetsuid);
|
||||
EXPORT_SYMBOL(crgetfsuid);
|
||||
EXPORT_SYMBOL(crgetgid);
|
||||
EXPORT_SYMBOL(crgetrgid);
|
||||
EXPORT_SYMBOL(crgetsgid);
|
||||
EXPORT_SYMBOL(crgetfsgid);
|
||||
EXPORT_SYMBOL(crgetngroups);
|
||||
EXPORT_SYMBOL(crgetgroups);
|
||||
EXPORT_SYMBOL(groupmember);
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Error Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/cmn_err.h>
|
||||
|
||||
/*
|
||||
* It is often useful to actually have the panic crash the node so you
|
||||
* can then get notified of the event, get the crashdump for later
|
||||
* analysis and other such goodies.
|
||||
* But we would still default to the current default of not to do that.
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
unsigned int spl_panic_halt;
|
||||
module_param(spl_panic_halt, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
|
||||
/* END CSTYLED */
|
||||
|
||||
void
|
||||
spl_dumpstack(void)
|
||||
{
|
||||
printk("Showing stack for process %d\n", current->pid);
|
||||
dump_stack();
|
||||
}
|
||||
EXPORT_SYMBOL(spl_dumpstack);
|
||||
|
||||
int
|
||||
spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
|
||||
{
|
||||
const char *newfile;
|
||||
char msg[MAXMSGLEN];
|
||||
va_list ap;
|
||||
|
||||
newfile = strrchr(file, '/');
|
||||
if (newfile != NULL)
|
||||
newfile = newfile + 1;
|
||||
else
|
||||
newfile = file;
|
||||
|
||||
va_start(ap, fmt);
|
||||
(void) vsnprintf(msg, sizeof (msg), fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
printk(KERN_EMERG "%s", msg);
|
||||
printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
|
||||
if (spl_panic_halt)
|
||||
panic("%s", msg);
|
||||
|
||||
spl_dumpstack();
|
||||
|
||||
/* Halt the thread to facilitate further debugging */
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
while (1)
|
||||
schedule();
|
||||
|
||||
/* Unreachable */
|
||||
return (1);
|
||||
}
|
||||
EXPORT_SYMBOL(spl_panic);
|
||||
|
||||
void
|
||||
vcmn_err(int ce, const char *fmt, va_list ap)
|
||||
{
|
||||
char msg[MAXMSGLEN];
|
||||
|
||||
vsnprintf(msg, MAXMSGLEN, fmt, ap);
|
||||
|
||||
switch (ce) {
|
||||
case CE_IGNORE:
|
||||
break;
|
||||
case CE_CONT:
|
||||
printk("%s", msg);
|
||||
break;
|
||||
case CE_NOTE:
|
||||
printk(KERN_NOTICE "NOTICE: %s\n", msg);
|
||||
break;
|
||||
case CE_WARN:
|
||||
printk(KERN_WARNING "WARNING: %s\n", msg);
|
||||
break;
|
||||
case CE_PANIC:
|
||||
printk(KERN_EMERG "PANIC: %s\n", msg);
|
||||
spl_dumpstack();
|
||||
|
||||
/* Halt the thread to facilitate further debugging */
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
while (1)
|
||||
schedule();
|
||||
}
|
||||
} /* vcmn_err() */
|
||||
EXPORT_SYMBOL(vcmn_err);
|
||||
|
||||
void
|
||||
cmn_err(int ce, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vcmn_err(ce, fmt, ap);
|
||||
va_end(ap);
|
||||
} /* cmn_err() */
|
||||
EXPORT_SYMBOL(cmn_err);
|
||||
@@ -0,0 +1,757 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Generic Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/systeminfo.h>
|
||||
#include <sys/vmsystm.h>
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/tsd.h>
|
||||
#include <sys/zmod.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/file.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <sys/disp.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/strings.h>
|
||||
#include <linux/kmod.h>
|
||||
#include "zfs_gitrev.h"
|
||||
|
||||
char spl_gitrev[64] = ZFS_META_GITREV;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
unsigned long spl_hostid = 0;
|
||||
EXPORT_SYMBOL(spl_hostid);
|
||||
/* BEGIN CSTYLED */
|
||||
module_param(spl_hostid, ulong, 0644);
|
||||
MODULE_PARM_DESC(spl_hostid, "The system hostid.");
|
||||
/* END CSTYLED */
|
||||
|
||||
proc_t p0;
|
||||
EXPORT_SYMBOL(p0);
|
||||
|
||||
/*
|
||||
* Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
|
||||
*
|
||||
* "Further scramblings of Marsaglia's xorshift generators"
|
||||
* http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
|
||||
*
|
||||
* random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
|
||||
* is to provide bytes containing random numbers. It is mapped to /dev/urandom
|
||||
* on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
|
||||
* random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
|
||||
* we can implement it using a fast PRNG that we seed using Linux' actual
|
||||
* equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
|
||||
* with an independent seed so that all calls to random_get_pseudo_bytes() are
|
||||
* free of atomic instructions.
|
||||
*
|
||||
* A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
|
||||
* to generate words larger than 128 bits will paradoxically be limited to
|
||||
* `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
|
||||
* 128-bit words and selecting the first will implicitly select the second. If
|
||||
* a caller finds this behavior undesirable, random_get_bytes() should be used
|
||||
* instead.
|
||||
*
|
||||
* XXX: Linux interrupt handlers that trigger within the critical section
|
||||
* formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
|
||||
* see the same numbers. Nothing in the code currently calls this in an
|
||||
* interrupt handler, so this is considered to be okay. If that becomes a
|
||||
* problem, we could create a set of per-cpu variables for interrupt handlers
|
||||
* and use them when in_interrupt() from linux/preempt_mask.h evaluates to
|
||||
* true.
|
||||
*/
|
||||
static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
|
||||
|
||||
/*
|
||||
* spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
|
||||
* file:
|
||||
*
|
||||
* http://xorshift.di.unimi.it/xorshift128plus.c
|
||||
*/
|
||||
|
||||
static inline uint64_t
|
||||
spl_rand_next(uint64_t *s)
|
||||
{
|
||||
uint64_t s1 = s[0];
|
||||
const uint64_t s0 = s[1];
|
||||
s[0] = s0;
|
||||
s1 ^= s1 << 23; // a
|
||||
s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
|
||||
return (s[1] + s0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rand_jump(uint64_t *s)
|
||||
{
|
||||
static const uint64_t JUMP[] =
|
||||
{ 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
|
||||
|
||||
uint64_t s0 = 0;
|
||||
uint64_t s1 = 0;
|
||||
int i, b;
|
||||
for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
|
||||
for (b = 0; b < 64; b++) {
|
||||
if (JUMP[i] & 1ULL << b) {
|
||||
s0 ^= s[0];
|
||||
s1 ^= s[1];
|
||||
}
|
||||
(void) spl_rand_next(s);
|
||||
}
|
||||
|
||||
s[0] = s0;
|
||||
s[1] = s1;
|
||||
}
|
||||
|
||||
int
|
||||
random_get_pseudo_bytes(uint8_t *ptr, size_t len)
|
||||
{
|
||||
uint64_t *xp, s[2];
|
||||
|
||||
ASSERT(ptr);
|
||||
|
||||
xp = get_cpu_var(spl_pseudo_entropy);
|
||||
|
||||
s[0] = xp[0];
|
||||
s[1] = xp[1];
|
||||
|
||||
while (len) {
|
||||
union {
|
||||
uint64_t ui64;
|
||||
uint8_t byte[sizeof (uint64_t)];
|
||||
}entropy;
|
||||
int i = MIN(len, sizeof (uint64_t));
|
||||
|
||||
len -= i;
|
||||
entropy.ui64 = spl_rand_next(s);
|
||||
|
||||
while (i--)
|
||||
*ptr++ = entropy.byte[i];
|
||||
}
|
||||
|
||||
xp[0] = s[0];
|
||||
xp[1] = s[1];
|
||||
|
||||
put_cpu_var(spl_pseudo_entropy);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
EXPORT_SYMBOL(random_get_pseudo_bytes);
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
/*
|
||||
* Support 64/64 => 64 division on a 32-bit platform. While the kernel
|
||||
* provides a div64_u64() function for this we do not use it because the
|
||||
* implementation is flawed. There are cases which return incorrect
|
||||
* results as late as linux-2.6.35. Until this is fixed upstream the
|
||||
* spl must provide its own implementation.
|
||||
*
|
||||
* This implementation is a slightly modified version of the algorithm
|
||||
* proposed by the book 'Hacker's Delight'. The original source can be
|
||||
* found here and is available for use without restriction.
|
||||
*
|
||||
* http://www.hackersdelight.org/HDcode/newCode/divDouble.c
|
||||
*/
|
||||
|
||||
/*
|
||||
* Calculate number of leading of zeros for a 64-bit value.
|
||||
*/
|
||||
static int
|
||||
nlz64(uint64_t x)
|
||||
{
|
||||
register int n = 0;
|
||||
|
||||
if (x == 0)
|
||||
return (64);
|
||||
|
||||
if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
|
||||
if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
|
||||
if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; }
|
||||
if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; }
|
||||
if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; }
|
||||
if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; }
|
||||
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Newer kernels have a div_u64() function but we define our own
|
||||
* to simplify portability between kernel versions.
|
||||
*/
|
||||
static inline uint64_t
|
||||
__div_u64(uint64_t u, uint32_t v)
|
||||
{
|
||||
(void) do_div(u, v);
|
||||
return (u);
|
||||
}
|
||||
|
||||
/*
|
||||
* Implementation of 64-bit unsigned division for 32-bit machines.
|
||||
*
|
||||
* First the procedure takes care of the case in which the divisor is a
|
||||
* 32-bit quantity. There are two subcases: (1) If the left half of the
|
||||
* dividend is less than the divisor, one execution of do_div() is all that
|
||||
* is required (overflow is not possible). (2) Otherwise it does two
|
||||
* divisions, using the grade school method.
|
||||
*/
|
||||
uint64_t
|
||||
__udivdi3(uint64_t u, uint64_t v)
|
||||
{
|
||||
uint64_t u0, u1, v1, q0, q1, k;
|
||||
int n;
|
||||
|
||||
if (v >> 32 == 0) { // If v < 2**32:
|
||||
if (u >> 32 < v) { // If u/v cannot overflow,
|
||||
return (__div_u64(u, v)); // just do one division.
|
||||
} else { // If u/v would overflow:
|
||||
u1 = u >> 32; // Break u into two halves.
|
||||
u0 = u & 0xFFFFFFFF;
|
||||
q1 = __div_u64(u1, v); // First quotient digit.
|
||||
k = u1 - q1 * v; // First remainder, < v.
|
||||
u0 += (k << 32);
|
||||
q0 = __div_u64(u0, v); // Seconds quotient digit.
|
||||
return ((q1 << 32) + q0);
|
||||
}
|
||||
} else { // If v >= 2**32:
|
||||
n = nlz64(v); // 0 <= n <= 31.
|
||||
v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.
|
||||
u1 = u >> 1; // To ensure no overflow.
|
||||
q1 = __div_u64(u1, v1); // Get quotient from
|
||||
q0 = (q1 << n) >> 31; // Undo normalization and
|
||||
// division of u by 2.
|
||||
if (q0 != 0) // Make q0 correct or
|
||||
q0 = q0 - 1; // too small by 1.
|
||||
if ((u - q0 * v) >= v)
|
||||
q0 = q0 + 1; // Now q0 is correct.
|
||||
|
||||
return (q0);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__udivdi3);
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
#ifndef abs64
|
||||
#define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
|
||||
#endif
|
||||
/* END CSTYLED */
|
||||
|
||||
/*
|
||||
* Implementation of 64-bit signed division for 32-bit machines.
|
||||
*/
|
||||
int64_t
|
||||
__divdi3(int64_t u, int64_t v)
|
||||
{
|
||||
int64_t q, t;
|
||||
q = __udivdi3(abs64(u), abs64(v));
|
||||
t = (u ^ v) >> 63; // If u, v have different
|
||||
return ((q ^ t) - t); // signs, negate q.
|
||||
}
|
||||
EXPORT_SYMBOL(__divdi3);
|
||||
|
||||
/*
|
||||
* Implementation of 64-bit unsigned modulo for 32-bit machines.
|
||||
*/
|
||||
uint64_t
|
||||
__umoddi3(uint64_t dividend, uint64_t divisor)
|
||||
{
|
||||
return (dividend - (divisor * __udivdi3(dividend, divisor)));
|
||||
}
|
||||
EXPORT_SYMBOL(__umoddi3);
|
||||
|
||||
/*
|
||||
* Implementation of 64-bit unsigned division/modulo for 32-bit machines.
|
||||
*/
|
||||
uint64_t
|
||||
__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
|
||||
{
|
||||
uint64_t q = __udivdi3(n, d);
|
||||
if (r)
|
||||
*r = n - d * q;
|
||||
return (q);
|
||||
}
|
||||
EXPORT_SYMBOL(__udivmoddi4);
|
||||
|
||||
/*
|
||||
* Implementation of 64-bit signed division/modulo for 32-bit machines.
|
||||
*/
|
||||
int64_t
|
||||
__divmoddi4(int64_t n, int64_t d, int64_t *r)
|
||||
{
|
||||
int64_t q, rr;
|
||||
boolean_t nn = B_FALSE;
|
||||
boolean_t nd = B_FALSE;
|
||||
if (n < 0) {
|
||||
nn = B_TRUE;
|
||||
n = -n;
|
||||
}
|
||||
if (d < 0) {
|
||||
nd = B_TRUE;
|
||||
d = -d;
|
||||
}
|
||||
|
||||
q = __udivmoddi4(n, d, (uint64_t *)&rr);
|
||||
|
||||
if (nn != nd)
|
||||
q = -q;
|
||||
if (nn)
|
||||
rr = -rr;
|
||||
if (r)
|
||||
*r = rr;
|
||||
return (q);
|
||||
}
|
||||
EXPORT_SYMBOL(__divmoddi4);
|
||||
|
||||
#if defined(__arm) || defined(__arm__)
|
||||
/*
|
||||
* Implementation of 64-bit (un)signed division for 32-bit arm machines.
|
||||
*
|
||||
* Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned)
|
||||
* long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
|
||||
* and the remainder in {r2, r3}. The return type is specifically left
|
||||
* set to 'void' to ensure the compiler does not overwrite these registers
|
||||
* during the return. All results are in registers as per ABI
|
||||
*/
|
||||
void
|
||||
__aeabi_uldivmod(uint64_t u, uint64_t v)
|
||||
{
|
||||
uint64_t res;
|
||||
uint64_t mod;
|
||||
|
||||
res = __udivdi3(u, v);
|
||||
mod = __umoddi3(u, v);
|
||||
{
|
||||
register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
|
||||
register uint32_t r1 asm("r1") = (res >> 32);
|
||||
register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
|
||||
register uint32_t r3 asm("r3") = (mod >> 32);
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
asm volatile(""
|
||||
: "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
|
||||
: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
|
||||
/* END CSTYLED */
|
||||
|
||||
return; /* r0; */
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__aeabi_uldivmod);
|
||||
|
||||
void
|
||||
__aeabi_ldivmod(int64_t u, int64_t v)
|
||||
{
|
||||
int64_t res;
|
||||
uint64_t mod;
|
||||
|
||||
res = __divdi3(u, v);
|
||||
mod = __umoddi3(u, v);
|
||||
{
|
||||
register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
|
||||
register uint32_t r1 asm("r1") = (res >> 32);
|
||||
register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
|
||||
register uint32_t r3 asm("r3") = (mod >> 32);
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
asm volatile(""
|
||||
: "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
|
||||
: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
|
||||
/* END CSTYLED */
|
||||
|
||||
return; /* r0; */
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__aeabi_ldivmod);
|
||||
#endif /* __arm || __arm__ */
|
||||
#endif /* BITS_PER_LONG */
|
||||
|
||||
/*
|
||||
* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
|
||||
* ddi_strtol(9F) man page. I have not verified the behavior of these
|
||||
* functions against their Solaris counterparts. It is possible that I
|
||||
* may have misinterpreted the man page or the man page is incorrect.
|
||||
*/
|
||||
int ddi_strtoul(const char *, char **, int, unsigned long *);
|
||||
int ddi_strtol(const char *, char **, int, long *);
|
||||
int ddi_strtoull(const char *, char **, int, unsigned long long *);
|
||||
int ddi_strtoll(const char *, char **, int, long long *);
|
||||
|
||||
#define define_ddi_strtoux(type, valtype) \
|
||||
int ddi_strtou##type(const char *str, char **endptr, \
|
||||
int base, valtype *result) \
|
||||
{ \
|
||||
valtype last_value, value = 0; \
|
||||
char *ptr = (char *)str; \
|
||||
int flag = 1, digit; \
|
||||
\
|
||||
if (strlen(ptr) == 0) \
|
||||
return (EINVAL); \
|
||||
\
|
||||
/* Auto-detect base based on prefix */ \
|
||||
if (!base) { \
|
||||
if (str[0] == '0') { \
|
||||
if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
|
||||
base = 16; /* hex */ \
|
||||
ptr += 2; \
|
||||
} else if (str[1] >= '0' && str[1] < 8) { \
|
||||
base = 8; /* octal */ \
|
||||
ptr += 1; \
|
||||
} else { \
|
||||
return (EINVAL); \
|
||||
} \
|
||||
} else { \
|
||||
base = 10; /* decimal */ \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
while (1) { \
|
||||
if (isdigit(*ptr)) \
|
||||
digit = *ptr - '0'; \
|
||||
else if (isalpha(*ptr)) \
|
||||
digit = tolower(*ptr) - 'a' + 10; \
|
||||
else \
|
||||
break; \
|
||||
\
|
||||
if (digit >= base) \
|
||||
break; \
|
||||
\
|
||||
last_value = value; \
|
||||
value = value * base + digit; \
|
||||
if (last_value > value) /* Overflow */ \
|
||||
return (ERANGE); \
|
||||
\
|
||||
flag = 1; \
|
||||
ptr++; \
|
||||
} \
|
||||
\
|
||||
if (flag) \
|
||||
*result = value; \
|
||||
\
|
||||
if (endptr) \
|
||||
*endptr = (char *)(flag ? ptr : str); \
|
||||
\
|
||||
return (0); \
|
||||
} \
|
||||
|
||||
#define define_ddi_strtox(type, valtype) \
|
||||
int ddi_strto##type(const char *str, char **endptr, \
|
||||
int base, valtype *result) \
|
||||
{ \
|
||||
int rc; \
|
||||
\
|
||||
if (*str == '-') { \
|
||||
rc = ddi_strtou##type(str + 1, endptr, base, result); \
|
||||
if (!rc) { \
|
||||
if (*endptr == str + 1) \
|
||||
*endptr = (char *)str; \
|
||||
else \
|
||||
*result = -*result; \
|
||||
} \
|
||||
} else { \
|
||||
rc = ddi_strtou##type(str, endptr, base, result); \
|
||||
} \
|
||||
\
|
||||
return (rc); \
|
||||
}
|
||||
|
||||
define_ddi_strtoux(l, unsigned long)
|
||||
define_ddi_strtox(l, long)
|
||||
define_ddi_strtoux(ll, unsigned long long)
|
||||
define_ddi_strtox(ll, long long)
|
||||
|
||||
EXPORT_SYMBOL(ddi_strtoul);
|
||||
EXPORT_SYMBOL(ddi_strtol);
|
||||
EXPORT_SYMBOL(ddi_strtoll);
|
||||
EXPORT_SYMBOL(ddi_strtoull);
|
||||
|
||||
int
|
||||
ddi_copyin(const void *from, void *to, size_t len, int flags)
|
||||
{
|
||||
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
|
||||
if (flags & FKIOCTL) {
|
||||
memcpy(to, from, len);
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (copyin(from, to, len));
|
||||
}
|
||||
EXPORT_SYMBOL(ddi_copyin);
|
||||
|
||||
int
|
||||
ddi_copyout(const void *from, void *to, size_t len, int flags)
|
||||
{
|
||||
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
|
||||
if (flags & FKIOCTL) {
|
||||
memcpy(to, from, len);
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (copyout(from, to, len));
|
||||
}
|
||||
EXPORT_SYMBOL(ddi_copyout);
|
||||
|
||||
/*
|
||||
* Read the unique system identifier from the /etc/hostid file.
|
||||
*
|
||||
* The behavior of /usr/bin/hostid on Linux systems with the
|
||||
* regular eglibc and coreutils is:
|
||||
*
|
||||
* 1. Generate the value if the /etc/hostid file does not exist
|
||||
* or if the /etc/hostid file is less than four bytes in size.
|
||||
*
|
||||
* 2. If the /etc/hostid file is at least 4 bytes, then return
|
||||
* the first four bytes [0..3] in native endian order.
|
||||
*
|
||||
* 3. Always ignore bytes [4..] if they exist in the file.
|
||||
*
|
||||
* Only the first four bytes are significant, even on systems that
|
||||
* have a 64-bit word size.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* eglibc: sysdeps/unix/sysv/linux/gethostid.c
|
||||
* coreutils: src/hostid.c
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
* The /etc/hostid file on Solaris is a text file that often reads:
|
||||
*
|
||||
* # DO NOT EDIT
|
||||
* "0123456789"
|
||||
*
|
||||
* Directly copying this file to Linux results in a constant
|
||||
* hostid of 4f442023 because the default comment constitutes
|
||||
* the first four bytes of the file.
|
||||
*
|
||||
*/
|
||||
|
||||
char *spl_hostid_path = HW_HOSTID_PATH;
|
||||
module_param(spl_hostid_path, charp, 0444);
|
||||
MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
|
||||
|
||||
static int
|
||||
hostid_read(uint32_t *hostid)
|
||||
{
|
||||
uint64_t size;
|
||||
struct _buf *file;
|
||||
uint32_t value = 0;
|
||||
int error;
|
||||
|
||||
file = kobj_open_file(spl_hostid_path);
|
||||
if (file == (struct _buf *)-1)
|
||||
return (ENOENT);
|
||||
|
||||
error = kobj_get_filesize(file, &size);
|
||||
if (error) {
|
||||
kobj_close_file(file);
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (size < sizeof (HW_HOSTID_MASK)) {
|
||||
kobj_close_file(file);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read directly into the variable like eglibc does.
|
||||
* Short reads are okay; native behavior is preserved.
|
||||
*/
|
||||
error = kobj_read_file(file, (char *)&value, sizeof (value), 0);
|
||||
if (error < 0) {
|
||||
kobj_close_file(file);
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
/* Mask down to 32 bits like coreutils does. */
|
||||
*hostid = (value & HW_HOSTID_MASK);
|
||||
kobj_close_file(file);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the system hostid. Preferentially use the spl_hostid module option
|
||||
* when set, otherwise use the value in the /etc/hostid file.
|
||||
*/
|
||||
uint32_t
|
||||
zone_get_hostid(void *zone)
|
||||
{
|
||||
uint32_t hostid;
|
||||
|
||||
ASSERT3P(zone, ==, NULL);
|
||||
|
||||
if (spl_hostid != 0)
|
||||
return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
|
||||
|
||||
if (hostid_read(&hostid) == 0)
|
||||
return (hostid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(zone_get_hostid);
|
||||
|
||||
static int
|
||||
spl_kvmem_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
rc = spl_kmem_init();
|
||||
if (rc)
|
||||
return (rc);
|
||||
|
||||
rc = spl_vmem_init();
|
||||
if (rc) {
|
||||
spl_kmem_fini();
|
||||
return (rc);
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* We initialize the random number generator with 128 bits of entropy from the
|
||||
* system random number generator. In the improbable case that we have a zero
|
||||
* seed, we fallback to the system jiffies, unless it is also zero, in which
|
||||
* situation we use a preprogrammed seed. We step forward by 2^64 iterations to
|
||||
* initialize each of the per-cpu seeds so that the sequences generated on each
|
||||
* CPU are guaranteed to never overlap in practice.
|
||||
*/
|
||||
static void __init
|
||||
spl_random_init(void)
|
||||
{
|
||||
uint64_t s[2];
|
||||
int i;
|
||||
|
||||
get_random_bytes(s, sizeof (s));
|
||||
|
||||
if (s[0] == 0 && s[1] == 0) {
|
||||
if (jiffies != 0) {
|
||||
s[0] = jiffies;
|
||||
s[1] = ~0 - jiffies;
|
||||
} else {
|
||||
(void) memcpy(s, "improbable seed", sizeof (s));
|
||||
}
|
||||
printk("SPL: get_random_bytes() returned 0 "
|
||||
"when generating random seed. Setting initial seed to "
|
||||
"0x%016llx%016llx.\n", cpu_to_be64(s[0]),
|
||||
cpu_to_be64(s[1]));
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
|
||||
|
||||
spl_rand_jump(s);
|
||||
|
||||
wordp[0] = s[0];
|
||||
wordp[1] = s[1];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spl_kvmem_fini(void)
|
||||
{
|
||||
spl_vmem_fini();
|
||||
spl_kmem_fini();
|
||||
}
|
||||
|
||||
static int __init
|
||||
spl_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
bzero(&p0, sizeof (proc_t));
|
||||
spl_random_init();
|
||||
|
||||
if ((rc = spl_kvmem_init()))
|
||||
goto out1;
|
||||
|
||||
if ((rc = spl_tsd_init()))
|
||||
goto out2;
|
||||
|
||||
if ((rc = spl_taskq_init()))
|
||||
goto out3;
|
||||
|
||||
if ((rc = spl_kmem_cache_init()))
|
||||
goto out4;
|
||||
|
||||
if ((rc = spl_vn_init()))
|
||||
goto out5;
|
||||
|
||||
if ((rc = spl_proc_init()))
|
||||
goto out6;
|
||||
|
||||
if ((rc = spl_kstat_init()))
|
||||
goto out7;
|
||||
|
||||
if ((rc = spl_zlib_init()))
|
||||
goto out8;
|
||||
|
||||
return (rc);
|
||||
|
||||
out8:
|
||||
spl_kstat_fini();
|
||||
out7:
|
||||
spl_proc_fini();
|
||||
out6:
|
||||
spl_vn_fini();
|
||||
out5:
|
||||
spl_kmem_cache_fini();
|
||||
out4:
|
||||
spl_taskq_fini();
|
||||
out3:
|
||||
spl_tsd_fini();
|
||||
out2:
|
||||
spl_kvmem_fini();
|
||||
out1:
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static void __exit
|
||||
spl_fini(void)
|
||||
{
|
||||
spl_zlib_fini();
|
||||
spl_kstat_fini();
|
||||
spl_proc_fini();
|
||||
spl_vn_fini();
|
||||
spl_kmem_cache_fini();
|
||||
spl_taskq_fini();
|
||||
spl_tsd_fini();
|
||||
spl_kvmem_fini();
|
||||
}
|
||||
|
||||
module_init(spl_init);
|
||||
module_exit(spl_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Solaris Porting Layer");
|
||||
MODULE_AUTHOR(ZFS_META_AUTHOR);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,556 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <sys/debug.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
/*
|
||||
* As a general rule kmem_alloc() allocations should be small, preferably
|
||||
* just a few pages since they must by physically contiguous. Therefore, a
|
||||
* rate limited warning will be printed to the console for any kmem_alloc()
|
||||
* which exceeds a reasonable threshold.
|
||||
*
|
||||
* The default warning threshold is set to sixteen pages but capped at 64K to
|
||||
* accommodate systems using large pages. This value was selected to be small
|
||||
* enough to ensure the largest allocations are quickly noticed and fixed.
|
||||
* But large enough to avoid logging any warnings when a allocation size is
|
||||
* larger than optimal but not a serious concern. Since this value is tunable,
|
||||
* developers are encouraged to set it lower when testing so any new largish
|
||||
* allocations are quickly caught. These warnings may be disabled by setting
|
||||
* the threshold to zero.
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
|
||||
module_param(spl_kmem_alloc_warn, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_alloc_warn,
|
||||
"Warning threshold in bytes for a kmem_alloc()");
|
||||
EXPORT_SYMBOL(spl_kmem_alloc_warn);
|
||||
|
||||
/*
|
||||
* Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
|
||||
* Allocations which are marginally smaller than this limit may succeed but
|
||||
* should still be avoided due to the expense of locating a contiguous range
|
||||
* of free pages. Therefore, a maximum kmem size with reasonable safely
|
||||
* margin of 4x is set. Kmem_alloc() allocations larger than this maximum
|
||||
* will quickly fail. Vmem_alloc() allocations less than or equal to this
|
||||
* value will use kmalloc(), but shift to vmalloc() when exceeding this value.
|
||||
*/
|
||||
unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
|
||||
module_param(spl_kmem_alloc_max, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_alloc_max,
|
||||
"Maximum size in bytes for a kmem_alloc()");
|
||||
EXPORT_SYMBOL(spl_kmem_alloc_max);
|
||||
/* END CSTYLED */
|
||||
|
||||
int
|
||||
kmem_debugging(void)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_debugging);
|
||||
|
||||
char *
|
||||
kmem_vasprintf(const char *fmt, va_list ap)
|
||||
{
|
||||
va_list aq;
|
||||
char *ptr;
|
||||
|
||||
do {
|
||||
va_copy(aq, ap);
|
||||
ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, aq);
|
||||
va_end(aq);
|
||||
} while (ptr == NULL);
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_vasprintf);
|
||||
|
||||
char *
|
||||
kmem_asprintf(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char *ptr;
|
||||
|
||||
do {
|
||||
va_start(ap, fmt);
|
||||
ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, ap);
|
||||
va_end(ap);
|
||||
} while (ptr == NULL);
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_asprintf);
|
||||
|
||||
static char *
|
||||
__strdup(const char *str, int flags)
|
||||
{
|
||||
char *ptr;
|
||||
int n;
|
||||
|
||||
n = strlen(str);
|
||||
ptr = kmalloc(n + 1, kmem_flags_convert(flags));
|
||||
if (ptr)
|
||||
memcpy(ptr, str, n + 1);
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
char *
|
||||
strdup(const char *str)
|
||||
{
|
||||
return (__strdup(str, KM_SLEEP));
|
||||
}
|
||||
EXPORT_SYMBOL(strdup);
|
||||
|
||||
void
|
||||
strfree(char *str)
|
||||
{
|
||||
kfree(str);
|
||||
}
|
||||
EXPORT_SYMBOL(strfree);
|
||||
|
||||
/*
|
||||
* General purpose unified implementation of kmem_alloc(). It is an
|
||||
* amalgamation of Linux and Illumos allocator design. It should never be
|
||||
* exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
|
||||
* relatively portable. Consumers may only access this function through
|
||||
* wrappers that enforce the common flags to ensure portability.
|
||||
*/
|
||||
inline void *
|
||||
spl_kmem_alloc_impl(size_t size, int flags, int node)
|
||||
{
|
||||
gfp_t lflags = kmem_flags_convert(flags);
|
||||
int use_vmem = 0;
|
||||
void *ptr;
|
||||
|
||||
/*
|
||||
* Log abnormally large allocations and rate limit the console output.
|
||||
* Allocations larger than spl_kmem_alloc_warn should be performed
|
||||
* through the vmem_alloc()/vmem_zalloc() interfaces.
|
||||
*/
|
||||
if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
|
||||
!(flags & KM_VMEM)) {
|
||||
printk(KERN_WARNING
|
||||
"Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
|
||||
"https://github.com/zfsonlinux/zfs/issues/new\n",
|
||||
(unsigned long)size, flags);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
|
||||
* unlike kmem_alloc() with KM_SLEEP on Illumos.
|
||||
*/
|
||||
do {
|
||||
/*
|
||||
* Calling kmalloc_node() when the size >= spl_kmem_alloc_max
|
||||
* is unsafe. This must fail for all for kmem_alloc() and
|
||||
* kmem_zalloc() callers.
|
||||
*
|
||||
* For vmem_alloc() and vmem_zalloc() callers it is permissible
|
||||
* to use __vmalloc(). However, in general use of __vmalloc()
|
||||
* is strongly discouraged because a global lock must be
|
||||
* acquired. Contention on this lock can significantly
|
||||
* impact performance so frequently manipulating the virtual
|
||||
* address space is strongly discouraged.
|
||||
*/
|
||||
if ((size > spl_kmem_alloc_max) || use_vmem) {
|
||||
if (flags & KM_VMEM) {
|
||||
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
|
||||
PAGE_KERNEL);
|
||||
} else {
|
||||
return (NULL);
|
||||
}
|
||||
} else {
|
||||
ptr = kmalloc_node(size, lflags, node);
|
||||
}
|
||||
|
||||
if (likely(ptr) || (flags & KM_NOSLEEP))
|
||||
return (ptr);
|
||||
|
||||
/*
|
||||
* For vmem_alloc() and vmem_zalloc() callers retry immediately
|
||||
* using __vmalloc() which is unlikely to fail.
|
||||
*/
|
||||
if ((flags & KM_VMEM) && (use_vmem == 0)) {
|
||||
use_vmem = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use cond_resched() instead of congestion_wait() to avoid
|
||||
* deadlocking systems where there are no block devices.
|
||||
*/
|
||||
cond_resched();
|
||||
} while (1);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
inline void
|
||||
spl_kmem_free_impl(const void *buf, size_t size)
|
||||
{
|
||||
if (is_vmalloc_addr(buf))
|
||||
vfree(buf);
|
||||
else
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory allocation and accounting for kmem_* * style allocations. When
|
||||
* DEBUG_KMEM is enabled the total memory allocated will be tracked and
|
||||
* any memory leaked will be reported during module unload.
|
||||
*
|
||||
* ./configure --enable-debug-kmem
|
||||
*/
|
||||
#ifdef DEBUG_KMEM
|
||||
|
||||
/* Shim layer memory accounting */
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
|
||||
unsigned long long kmem_alloc_max = 0;
|
||||
#else /* HAVE_ATOMIC64_T */
|
||||
atomic_t kmem_alloc_used = ATOMIC_INIT(0);
|
||||
unsigned long long kmem_alloc_max = 0;
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
|
||||
EXPORT_SYMBOL(kmem_alloc_used);
|
||||
EXPORT_SYMBOL(kmem_alloc_max);
|
||||
|
||||
inline void *
|
||||
spl_kmem_alloc_debug(size_t size, int flags, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = spl_kmem_alloc_impl(size, flags, node);
|
||||
if (ptr) {
|
||||
kmem_alloc_used_add(size);
|
||||
if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
|
||||
kmem_alloc_max = kmem_alloc_used_read();
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
inline void
|
||||
spl_kmem_free_debug(const void *ptr, size_t size)
|
||||
{
|
||||
kmem_alloc_used_sub(size);
|
||||
spl_kmem_free_impl(ptr, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
|
||||
* but also the location of every alloc and free. When the SPL module is
|
||||
* unloaded a list of all leaked addresses and where they were allocated
|
||||
* will be dumped to the console. Enabling this feature has a significant
|
||||
* impact on performance but it makes finding memory leaks straight forward.
|
||||
*
|
||||
* Not surprisingly with debugging enabled the xmem_locks are very highly
|
||||
* contended particularly on xfree(). If we want to run with this detailed
|
||||
* debugging enabled for anything other than debugging we need to minimize
|
||||
* the contention by moving to a lock per xmem_table entry model.
|
||||
*
|
||||
* ./configure --enable-debug-kmem-tracking
|
||||
*/
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/ctype.h>
|
||||
|
||||
#define KMEM_HASH_BITS 10
|
||||
#define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
|
||||
|
||||
typedef struct kmem_debug {
|
||||
struct hlist_node kd_hlist; /* Hash node linkage */
|
||||
struct list_head kd_list; /* List of all allocations */
|
||||
void *kd_addr; /* Allocation pointer */
|
||||
size_t kd_size; /* Allocation size */
|
||||
const char *kd_func; /* Allocation function */
|
||||
int kd_line; /* Allocation line */
|
||||
} kmem_debug_t;
|
||||
|
||||
static spinlock_t kmem_lock;
|
||||
static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
|
||||
static struct list_head kmem_list;
|
||||
|
||||
static kmem_debug_t *
|
||||
kmem_del_init(spinlock_t *lock, struct hlist_head *table,
|
||||
int bits, const void *addr)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
struct kmem_debug *p;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
|
||||
head = &table[hash_ptr((void *)addr, bits)];
|
||||
hlist_for_each(node, head) {
|
||||
p = list_entry(node, struct kmem_debug, kd_hlist);
|
||||
if (p->kd_addr == addr) {
|
||||
hlist_del_init(&p->kd_hlist);
|
||||
list_del_init(&p->kd_list);
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return (p);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
inline void *
|
||||
spl_kmem_alloc_track(size_t size, int flags,
|
||||
const char *func, int line, int node)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
kmem_debug_t *dptr;
|
||||
unsigned long irq_flags;
|
||||
|
||||
dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
|
||||
if (dptr == NULL)
|
||||
return (NULL);
|
||||
|
||||
dptr->kd_func = __strdup(func, flags);
|
||||
if (dptr->kd_func == NULL) {
|
||||
kfree(dptr);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
ptr = spl_kmem_alloc_debug(size, flags, node);
|
||||
if (ptr == NULL) {
|
||||
kfree(dptr->kd_func);
|
||||
kfree(dptr);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
INIT_HLIST_NODE(&dptr->kd_hlist);
|
||||
INIT_LIST_HEAD(&dptr->kd_list);
|
||||
|
||||
dptr->kd_addr = ptr;
|
||||
dptr->kd_size = size;
|
||||
dptr->kd_line = line;
|
||||
|
||||
spin_lock_irqsave(&kmem_lock, irq_flags);
|
||||
hlist_add_head(&dptr->kd_hlist,
|
||||
&kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
|
||||
list_add_tail(&dptr->kd_list, &kmem_list);
|
||||
spin_unlock_irqrestore(&kmem_lock, irq_flags);
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
inline void
|
||||
spl_kmem_free_track(const void *ptr, size_t size)
|
||||
{
|
||||
kmem_debug_t *dptr;
|
||||
|
||||
/* Ignore NULL pointer since we haven't tracked it at all */
|
||||
if (ptr == NULL)
|
||||
return;
|
||||
|
||||
/* Must exist in hash due to kmem_alloc() */
|
||||
dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
|
||||
ASSERT3P(dptr, !=, NULL);
|
||||
ASSERT3S(dptr->kd_size, ==, size);
|
||||
|
||||
kfree(dptr->kd_func);
|
||||
kfree(dptr);
|
||||
|
||||
spl_kmem_free_debug(ptr, size);
|
||||
}
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
/*
|
||||
* Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
|
||||
*/
|
||||
void *
|
||||
spl_kmem_alloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_alloc);
|
||||
|
||||
void *
|
||||
spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
flags |= KM_ZERO;
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_zalloc);
|
||||
|
||||
void
|
||||
spl_kmem_free(const void *buf, size_t size)
|
||||
{
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_free_impl(buf, size));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_free_debug(buf, size));
|
||||
#else
|
||||
return (spl_kmem_free_track(buf, size));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_free);
|
||||
|
||||
#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
|
||||
static char *
|
||||
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
|
||||
{
|
||||
int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
|
||||
int i, flag = 1;
|
||||
|
||||
ASSERT(str != NULL && len >= 17);
|
||||
memset(str, 0, len);
|
||||
|
||||
/*
|
||||
* Check for a fully printable string, and while we are at
|
||||
* it place the printable characters in the passed buffer.
|
||||
*/
|
||||
for (i = 0; i < size; i++) {
|
||||
str[i] = ((char *)(kd->kd_addr))[i];
|
||||
if (isprint(str[i])) {
|
||||
continue;
|
||||
} else {
|
||||
/*
|
||||
* Minimum number of printable characters found
|
||||
* to make it worthwhile to print this as ascii.
|
||||
*/
|
||||
if (i > min)
|
||||
break;
|
||||
|
||||
flag = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!flag) {
|
||||
sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
|
||||
*((uint8_t *)kd->kd_addr),
|
||||
*((uint8_t *)kd->kd_addr + 2),
|
||||
*((uint8_t *)kd->kd_addr + 4),
|
||||
*((uint8_t *)kd->kd_addr + 6),
|
||||
*((uint8_t *)kd->kd_addr + 8),
|
||||
*((uint8_t *)kd->kd_addr + 10),
|
||||
*((uint8_t *)kd->kd_addr + 12),
|
||||
*((uint8_t *)kd->kd_addr + 14));
|
||||
}
|
||||
|
||||
return (str);
|
||||
}
|
||||
|
||||
static int
|
||||
spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock_init(lock);
|
||||
INIT_LIST_HEAD(list);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
INIT_HLIST_HEAD(&kmem_table[i]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
kmem_debug_t *kd;
|
||||
char str[17];
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
if (!list_empty(list))
|
||||
printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
|
||||
"size", "data", "func", "line");
|
||||
|
||||
list_for_each_entry(kd, list, kd_list) {
|
||||
printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
|
||||
(int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
|
||||
kd->kd_func, kd->kd_line);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
|
||||
|
||||
int
|
||||
spl_kmem_init(void)
|
||||
{
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
kmem_alloc_used_set(0);
|
||||
|
||||
|
||||
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_kmem_fini(void)
|
||||
{
|
||||
#ifdef DEBUG_KMEM
|
||||
/*
|
||||
* Display all unreclaimed memory addresses, including the
|
||||
* allocation size and the first few bytes of what's located
|
||||
* at that address to aid in debugging. Performance is not
|
||||
* a serious concern here since it is module unload time.
|
||||
*/
|
||||
if (kmem_alloc_used_read() != 0)
|
||||
printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
|
||||
(unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
|
||||
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Kobj Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/kobj.h>
|
||||
|
||||
struct _buf *
|
||||
kobj_open_file(const char *name)
|
||||
{
|
||||
struct _buf *file;
|
||||
vnode_t *vp;
|
||||
int rc;
|
||||
|
||||
file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP));
|
||||
if (file == NULL)
|
||||
return ((_buf_t *)-1UL);
|
||||
|
||||
if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
|
||||
kfree(file);
|
||||
return ((_buf_t *)-1UL);
|
||||
}
|
||||
|
||||
file->vp = vp;
|
||||
|
||||
return (file);
|
||||
} /* kobj_open_file() */
|
||||
EXPORT_SYMBOL(kobj_open_file);
|
||||
|
||||
void
|
||||
kobj_close_file(struct _buf *file)
|
||||
{
|
||||
VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
|
||||
kfree(file);
|
||||
} /* kobj_close_file() */
|
||||
EXPORT_SYMBOL(kobj_close_file);
|
||||
|
||||
int
|
||||
kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
|
||||
{
|
||||
ssize_t resid;
|
||||
|
||||
if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
|
||||
UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
|
||||
return (-1);
|
||||
|
||||
return (size - resid);
|
||||
} /* kobj_read_file() */
|
||||
EXPORT_SYMBOL(kobj_read_file);
|
||||
|
||||
int
|
||||
kobj_get_filesize(struct _buf *file, uint64_t *size)
|
||||
{
|
||||
vattr_t vap;
|
||||
int rc;
|
||||
|
||||
rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
|
||||
if (rc)
|
||||
return (rc);
|
||||
|
||||
*size = vap.va_size;
|
||||
|
||||
return (rc);
|
||||
} /* kobj_get_filesize() */
|
||||
EXPORT_SYMBOL(kobj_get_filesize);
|
||||
@@ -0,0 +1,770 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Kstat Implementation.
|
||||
*/
|
||||
|
||||
#include <linux/seq_file.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/sysmacros.h>
|
||||
|
||||
static kmutex_t kstat_module_lock;
|
||||
static struct list_head kstat_module_list;
|
||||
static kid_t kstat_id;
|
||||
|
||||
static int
|
||||
kstat_resize_raw(kstat_t *ksp)
|
||||
{
|
||||
if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
|
||||
return (ENOMEM);
|
||||
|
||||
vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
|
||||
ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
|
||||
ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
kstat_waitq_enter(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t wcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->wlastupdate;
|
||||
kiop->wlastupdate = new;
|
||||
wcnt = kiop->wcnt++;
|
||||
if (wcnt != 0) {
|
||||
kiop->wlentime += delta * wcnt;
|
||||
kiop->wtime += delta;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_waitq_enter);
|
||||
|
||||
void
|
||||
kstat_waitq_exit(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t wcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->wlastupdate;
|
||||
kiop->wlastupdate = new;
|
||||
wcnt = kiop->wcnt--;
|
||||
ASSERT((int)wcnt > 0);
|
||||
kiop->wlentime += delta * wcnt;
|
||||
kiop->wtime += delta;
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_waitq_exit);
|
||||
|
||||
void
|
||||
kstat_runq_enter(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t rcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->rlastupdate;
|
||||
kiop->rlastupdate = new;
|
||||
rcnt = kiop->rcnt++;
|
||||
if (rcnt != 0) {
|
||||
kiop->rlentime += delta * rcnt;
|
||||
kiop->rtime += delta;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_runq_enter);
|
||||
|
||||
void
|
||||
kstat_runq_exit(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t rcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->rlastupdate;
|
||||
kiop->rlastupdate = new;
|
||||
rcnt = kiop->rcnt--;
|
||||
ASSERT((int)rcnt > 0);
|
||||
kiop->rlentime += delta * rcnt;
|
||||
kiop->rtime += delta;
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_runq_exit);
|
||||
|
||||
static int
|
||||
kstat_seq_show_headers(struct seq_file *f)
|
||||
{
|
||||
kstat_t *ksp = (kstat_t *)f->private;
|
||||
int rc = 0;
|
||||
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
|
||||
ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
|
||||
ksp->ks_ndata, (int)ksp->ks_data_size,
|
||||
ksp->ks_crtime, ksp->ks_snaptime);
|
||||
|
||||
switch (ksp->ks_type) {
|
||||
case KSTAT_TYPE_RAW:
|
||||
restart:
|
||||
if (ksp->ks_raw_ops.headers) {
|
||||
rc = ksp->ks_raw_ops.headers(
|
||||
ksp->ks_raw_buf, ksp->ks_raw_bufsize);
|
||||
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
||||
goto restart;
|
||||
if (!rc)
|
||||
seq_puts(f, ksp->ks_raw_buf);
|
||||
} else {
|
||||
seq_printf(f, "raw data\n");
|
||||
}
|
||||
break;
|
||||
case KSTAT_TYPE_NAMED:
|
||||
seq_printf(f, "%-31s %-4s %s\n",
|
||||
"name", "type", "data");
|
||||
break;
|
||||
case KSTAT_TYPE_INTR:
|
||||
seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
|
||||
"hard", "soft", "watchdog",
|
||||
"spurious", "multsvc");
|
||||
break;
|
||||
case KSTAT_TYPE_IO:
|
||||
seq_printf(f,
|
||||
"%-8s %-8s %-8s %-8s %-8s %-8s "
|
||||
"%-8s %-8s %-8s %-8s %-8s %-8s\n",
|
||||
"nread", "nwritten", "reads", "writes",
|
||||
"wtime", "wlentime", "wupdate",
|
||||
"rtime", "rlentime", "rupdate",
|
||||
"wcnt", "rcnt");
|
||||
break;
|
||||
case KSTAT_TYPE_TIMER:
|
||||
seq_printf(f,
|
||||
"%-31s %-8s "
|
||||
"%-8s %-8s %-8s %-8s %-8s\n",
|
||||
"name", "events", "elapsed",
|
||||
"min", "max", "start", "stop");
|
||||
break;
|
||||
default:
|
||||
PANIC("Undefined kstat type %d\n", ksp->ks_type);
|
||||
}
|
||||
|
||||
return (-rc);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; ; i++) {
|
||||
seq_printf(f, "%03x:", i);
|
||||
|
||||
for (j = 0; j < 16; j++) {
|
||||
if (i * 16 + j >= l) {
|
||||
seq_printf(f, "\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
|
||||
}
|
||||
seq_printf(f, "\n");
|
||||
}
|
||||
out:
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
|
||||
{
|
||||
seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
|
||||
|
||||
switch (knp->data_type) {
|
||||
case KSTAT_DATA_CHAR:
|
||||
knp->value.c[15] = '\0'; /* NULL terminate */
|
||||
seq_printf(f, "%-16s", knp->value.c);
|
||||
break;
|
||||
/*
|
||||
* NOTE - We need to be more careful able what tokens are
|
||||
* used for each arch, for now this is correct for x86_64.
|
||||
*/
|
||||
case KSTAT_DATA_INT32:
|
||||
seq_printf(f, "%d", knp->value.i32);
|
||||
break;
|
||||
case KSTAT_DATA_UINT32:
|
||||
seq_printf(f, "%u", knp->value.ui32);
|
||||
break;
|
||||
case KSTAT_DATA_INT64:
|
||||
seq_printf(f, "%lld", (signed long long)knp->value.i64);
|
||||
break;
|
||||
case KSTAT_DATA_UINT64:
|
||||
seq_printf(f, "%llu",
|
||||
(unsigned long long)knp->value.ui64);
|
||||
break;
|
||||
case KSTAT_DATA_LONG:
|
||||
seq_printf(f, "%ld", knp->value.l);
|
||||
break;
|
||||
case KSTAT_DATA_ULONG:
|
||||
seq_printf(f, "%lu", knp->value.ul);
|
||||
break;
|
||||
case KSTAT_DATA_STRING:
|
||||
KSTAT_NAMED_STR_PTR(knp)
|
||||
[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
|
||||
seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
|
||||
break;
|
||||
default:
|
||||
PANIC("Undefined kstat data type %d\n", knp->data_type);
|
||||
}
|
||||
|
||||
seq_printf(f, "\n");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
|
||||
{
|
||||
seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
|
||||
kip->intrs[KSTAT_INTR_HARD],
|
||||
kip->intrs[KSTAT_INTR_SOFT],
|
||||
kip->intrs[KSTAT_INTR_WATCHDOG],
|
||||
kip->intrs[KSTAT_INTR_SPURIOUS],
|
||||
kip->intrs[KSTAT_INTR_MULTSVC]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
|
||||
{
|
||||
/* though wlentime & friends are signed, they will never be negative */
|
||||
seq_printf(f,
|
||||
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
|
||||
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
|
||||
kip->nread, kip->nwritten,
|
||||
kip->reads, kip->writes,
|
||||
kip->wtime, kip->wlentime, kip->wlastupdate,
|
||||
kip->rtime, kip->rlentime, kip->rlastupdate,
|
||||
kip->wcnt, kip->rcnt);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
|
||||
{
|
||||
seq_printf(f,
|
||||
"%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n",
|
||||
ktp->name, ktp->num_events, ktp->elapsed_time,
|
||||
ktp->min_time, ktp->max_time,
|
||||
ktp->start_time, ktp->stop_time);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
kstat_t *ksp = (kstat_t *)f->private;
|
||||
int rc = 0;
|
||||
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
switch (ksp->ks_type) {
|
||||
case KSTAT_TYPE_RAW:
|
||||
restart:
|
||||
if (ksp->ks_raw_ops.data) {
|
||||
rc = ksp->ks_raw_ops.data(
|
||||
ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
|
||||
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
||||
goto restart;
|
||||
if (!rc)
|
||||
seq_puts(f, ksp->ks_raw_buf);
|
||||
} else {
|
||||
ASSERT(ksp->ks_ndata == 1);
|
||||
rc = kstat_seq_show_raw(f, ksp->ks_data,
|
||||
ksp->ks_data_size);
|
||||
}
|
||||
break;
|
||||
case KSTAT_TYPE_NAMED:
|
||||
rc = kstat_seq_show_named(f, (kstat_named_t *)p);
|
||||
break;
|
||||
case KSTAT_TYPE_INTR:
|
||||
rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
|
||||
break;
|
||||
case KSTAT_TYPE_IO:
|
||||
rc = kstat_seq_show_io(f, (kstat_io_t *)p);
|
||||
break;
|
||||
case KSTAT_TYPE_TIMER:
|
||||
rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
|
||||
break;
|
||||
default:
|
||||
PANIC("Undefined kstat type %d\n", ksp->ks_type);
|
||||
}
|
||||
|
||||
return (-rc);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_default_update(kstat_t *ksp, int rw)
|
||||
{
|
||||
ASSERT(ksp != NULL);
|
||||
|
||||
if (rw == KSTAT_WRITE)
|
||||
return (EACCES);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void *
|
||||
kstat_seq_data_addr(kstat_t *ksp, loff_t n)
|
||||
{
|
||||
void *rc = NULL;
|
||||
|
||||
switch (ksp->ks_type) {
|
||||
case KSTAT_TYPE_RAW:
|
||||
if (ksp->ks_raw_ops.addr)
|
||||
rc = ksp->ks_raw_ops.addr(ksp, n);
|
||||
else
|
||||
rc = ksp->ks_data;
|
||||
break;
|
||||
case KSTAT_TYPE_NAMED:
|
||||
rc = ksp->ks_data + n * sizeof (kstat_named_t);
|
||||
break;
|
||||
case KSTAT_TYPE_INTR:
|
||||
rc = ksp->ks_data + n * sizeof (kstat_intr_t);
|
||||
break;
|
||||
case KSTAT_TYPE_IO:
|
||||
rc = ksp->ks_data + n * sizeof (kstat_io_t);
|
||||
break;
|
||||
case KSTAT_TYPE_TIMER:
|
||||
rc = ksp->ks_data + n * sizeof (kstat_timer_t);
|
||||
break;
|
||||
default:
|
||||
PANIC("Undefined kstat type %d\n", ksp->ks_type);
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static void *
|
||||
kstat_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
loff_t n = *pos;
|
||||
kstat_t *ksp = (kstat_t *)f->private;
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
mutex_enter(ksp->ks_lock);
|
||||
|
||||
if (ksp->ks_type == KSTAT_TYPE_RAW) {
|
||||
ksp->ks_raw_bufsize = PAGE_SIZE;
|
||||
ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
|
||||
}
|
||||
|
||||
/* Dynamically update kstat, on error existing kstats are used */
|
||||
(void) ksp->ks_update(ksp, KSTAT_READ);
|
||||
|
||||
ksp->ks_snaptime = gethrtime();
|
||||
|
||||
if (!(ksp->ks_flags & KSTAT_FLAG_NO_HEADERS) && !n &&
|
||||
kstat_seq_show_headers(f))
|
||||
return (NULL);
|
||||
|
||||
if (n >= ksp->ks_ndata)
|
||||
return (NULL);
|
||||
|
||||
return (kstat_seq_data_addr(ksp, n));
|
||||
}
|
||||
|
||||
static void *
|
||||
kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
||||
{
|
||||
kstat_t *ksp = (kstat_t *)f->private;
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
++*pos;
|
||||
if (*pos >= ksp->ks_ndata)
|
||||
return (NULL);
|
||||
|
||||
return (kstat_seq_data_addr(ksp, *pos));
|
||||
}
|
||||
|
||||
static void
|
||||
kstat_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
kstat_t *ksp = (kstat_t *)f->private;
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
if (ksp->ks_type == KSTAT_TYPE_RAW)
|
||||
vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
|
||||
|
||||
mutex_exit(ksp->ks_lock);
|
||||
}
|
||||
|
||||
static struct seq_operations kstat_seq_ops = {
|
||||
.show = kstat_seq_show,
|
||||
.start = kstat_seq_start,
|
||||
.next = kstat_seq_next,
|
||||
.stop = kstat_seq_stop,
|
||||
};
|
||||
|
||||
static kstat_module_t *
|
||||
kstat_find_module(char *name)
|
||||
{
|
||||
kstat_module_t *module;
|
||||
|
||||
list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
|
||||
if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
|
||||
return (module);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static kstat_module_t *
|
||||
kstat_create_module(char *name)
|
||||
{
|
||||
kstat_module_t *module;
|
||||
struct proc_dir_entry *pde;
|
||||
|
||||
pde = proc_mkdir(name, proc_spl_kstat);
|
||||
if (pde == NULL)
|
||||
return (NULL);
|
||||
|
||||
module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
|
||||
module->ksm_proc = pde;
|
||||
strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
|
||||
INIT_LIST_HEAD(&module->ksm_kstat_list);
|
||||
list_add_tail(&module->ksm_module_list, &kstat_module_list);
|
||||
|
||||
return (module);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
kstat_delete_module(kstat_module_t *module)
|
||||
{
|
||||
ASSERT(list_empty(&module->ksm_kstat_list));
|
||||
remove_proc_entry(module->ksm_name, proc_spl_kstat);
|
||||
list_del(&module->ksm_module_list);
|
||||
kmem_free(module, sizeof (kstat_module_t));
|
||||
}
|
||||
|
||||
static int
|
||||
proc_kstat_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct seq_file *f;
|
||||
int rc;
|
||||
|
||||
rc = seq_open(filp, &kstat_seq_ops);
|
||||
if (rc)
|
||||
return (rc);
|
||||
|
||||
f = filp->private_data;
|
||||
f->private = PDE_DATA(inode);
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct seq_file *f = filp->private_data;
|
||||
kstat_t *ksp = f->private;
|
||||
int rc;
|
||||
|
||||
ASSERT(ksp->ks_magic == KS_MAGIC);
|
||||
|
||||
mutex_enter(ksp->ks_lock);
|
||||
rc = ksp->ks_update(ksp, KSTAT_WRITE);
|
||||
mutex_exit(ksp->ks_lock);
|
||||
|
||||
if (rc)
|
||||
return (-rc);
|
||||
|
||||
*ppos += len;
|
||||
return (len);
|
||||
}
|
||||
|
||||
static struct file_operations proc_kstat_operations = {
|
||||
.open = proc_kstat_open,
|
||||
.write = proc_kstat_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
void
|
||||
__kstat_set_raw_ops(kstat_t *ksp,
|
||||
int (*headers)(char *buf, size_t size),
|
||||
int (*data)(char *buf, size_t size, void *data),
|
||||
void *(*addr)(kstat_t *ksp, loff_t index))
|
||||
{
|
||||
ksp->ks_raw_ops.headers = headers;
|
||||
ksp->ks_raw_ops.data = data;
|
||||
ksp->ks_raw_ops.addr = addr;
|
||||
}
|
||||
EXPORT_SYMBOL(__kstat_set_raw_ops);
|
||||
|
||||
void
|
||||
kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
|
||||
const char *name)
|
||||
{
|
||||
kpep->kpe_owner = NULL;
|
||||
kpep->kpe_proc = NULL;
|
||||
INIT_LIST_HEAD(&kpep->kpe_list);
|
||||
strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
|
||||
strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_proc_entry_init);
|
||||
|
||||
kstat_t *
|
||||
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
|
||||
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
|
||||
uchar_t ks_flags)
|
||||
{
|
||||
kstat_t *ksp;
|
||||
|
||||
ASSERT(ks_module);
|
||||
ASSERT(ks_instance == 0);
|
||||
ASSERT(ks_name);
|
||||
|
||||
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
|
||||
ASSERT(ks_ndata == 1);
|
||||
|
||||
ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
|
||||
if (ksp == NULL)
|
||||
return (ksp);
|
||||
|
||||
mutex_enter(&kstat_module_lock);
|
||||
ksp->ks_kid = kstat_id;
|
||||
kstat_id++;
|
||||
mutex_exit(&kstat_module_lock);
|
||||
|
||||
ksp->ks_magic = KS_MAGIC;
|
||||
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
ksp->ks_lock = &ksp->ks_private_lock;
|
||||
|
||||
ksp->ks_crtime = gethrtime();
|
||||
ksp->ks_snaptime = ksp->ks_crtime;
|
||||
ksp->ks_instance = ks_instance;
|
||||
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
|
||||
ksp->ks_type = ks_type;
|
||||
ksp->ks_flags = ks_flags;
|
||||
ksp->ks_update = kstat_default_update;
|
||||
ksp->ks_private = NULL;
|
||||
ksp->ks_raw_ops.headers = NULL;
|
||||
ksp->ks_raw_ops.data = NULL;
|
||||
ksp->ks_raw_ops.addr = NULL;
|
||||
ksp->ks_raw_buf = NULL;
|
||||
ksp->ks_raw_bufsize = 0;
|
||||
kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
|
||||
|
||||
switch (ksp->ks_type) {
|
||||
case KSTAT_TYPE_RAW:
|
||||
ksp->ks_ndata = 1;
|
||||
ksp->ks_data_size = ks_ndata;
|
||||
break;
|
||||
case KSTAT_TYPE_NAMED:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
|
||||
break;
|
||||
case KSTAT_TYPE_INTR:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
|
||||
break;
|
||||
case KSTAT_TYPE_IO:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
|
||||
break;
|
||||
case KSTAT_TYPE_TIMER:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
|
||||
break;
|
||||
default:
|
||||
PANIC("Undefined kstat type %d\n", ksp->ks_type);
|
||||
}
|
||||
|
||||
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
|
||||
ksp->ks_data = NULL;
|
||||
} else {
|
||||
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
|
||||
if (ksp->ks_data == NULL) {
|
||||
kmem_free(ksp, sizeof (*ksp));
|
||||
ksp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return (ksp);
|
||||
}
|
||||
EXPORT_SYMBOL(__kstat_create);
|
||||
|
||||
static int
|
||||
kstat_detect_collision(kstat_proc_entry_t *kpep)
|
||||
{
|
||||
kstat_module_t *module;
|
||||
kstat_proc_entry_t *tmp;
|
||||
char *parent;
|
||||
char *cp;
|
||||
|
||||
parent = kmem_asprintf("%s", kpep->kpe_module);
|
||||
|
||||
if ((cp = strrchr(parent, '/')) == NULL) {
|
||||
strfree(parent);
|
||||
return (0);
|
||||
}
|
||||
|
||||
cp[0] = '\0';
|
||||
if ((module = kstat_find_module(parent)) != NULL) {
|
||||
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
|
||||
if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
|
||||
strfree(parent);
|
||||
return (EEXIST);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
strfree(parent);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a file to the proc filesystem under the kstat namespace (i.e.
|
||||
* /proc/spl/kstat/). The file need not necessarily be implemented as a
|
||||
* kstat.
|
||||
*/
|
||||
void
|
||||
kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
|
||||
const struct file_operations *file_ops, void *data)
|
||||
{
|
||||
kstat_module_t *module;
|
||||
kstat_proc_entry_t *tmp;
|
||||
|
||||
ASSERT(kpep);
|
||||
|
||||
mutex_enter(&kstat_module_lock);
|
||||
|
||||
module = kstat_find_module(kpep->kpe_module);
|
||||
if (module == NULL) {
|
||||
if (kstat_detect_collision(kpep) != 0) {
|
||||
cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
|
||||
" collision", kpep->kpe_module, kpep->kpe_name);
|
||||
goto out;
|
||||
}
|
||||
module = kstat_create_module(kpep->kpe_module);
|
||||
if (module == NULL)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only one entry by this name per-module, on failure the module
|
||||
* shouldn't be deleted because we know it has at least one entry.
|
||||
*/
|
||||
list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
|
||||
if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
|
||||
|
||||
kpep->kpe_owner = module;
|
||||
kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode,
|
||||
module->ksm_proc, file_ops, data);
|
||||
if (kpep->kpe_proc == NULL) {
|
||||
list_del_init(&kpep->kpe_list);
|
||||
if (list_empty(&module->ksm_kstat_list))
|
||||
kstat_delete_module(module);
|
||||
}
|
||||
out:
|
||||
mutex_exit(&kstat_module_lock);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_proc_entry_install);
|
||||
|
||||
void
|
||||
__kstat_install(kstat_t *ksp)
|
||||
{
|
||||
ASSERT(ksp);
|
||||
mode_t mode;
|
||||
/* Specify permission modes for different kstats */
|
||||
if (strncmp(ksp->ks_proc.kpe_name, "dbufs", KSTAT_STRLEN) == 0) {
|
||||
mode = 0600;
|
||||
} else {
|
||||
mode = 0644;
|
||||
}
|
||||
kstat_proc_entry_install(
|
||||
&ksp->ks_proc, mode, &proc_kstat_operations, ksp);
|
||||
}
|
||||
EXPORT_SYMBOL(__kstat_install);
|
||||
|
||||
void
|
||||
kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
|
||||
{
|
||||
kstat_module_t *module = kpep->kpe_owner;
|
||||
if (kpep->kpe_proc)
|
||||
remove_proc_entry(kpep->kpe_name, module->ksm_proc);
|
||||
|
||||
mutex_enter(&kstat_module_lock);
|
||||
list_del_init(&kpep->kpe_list);
|
||||
|
||||
/*
|
||||
* Remove top level module directory if it wasn't empty before, but now
|
||||
* is.
|
||||
*/
|
||||
if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
|
||||
kstat_delete_module(module);
|
||||
mutex_exit(&kstat_module_lock);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(kstat_proc_entry_delete);
|
||||
|
||||
void
|
||||
__kstat_delete(kstat_t *ksp)
|
||||
{
|
||||
kstat_proc_entry_delete(&ksp->ks_proc);
|
||||
|
||||
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
|
||||
kmem_free(ksp->ks_data, ksp->ks_data_size);
|
||||
|
||||
ksp->ks_lock = NULL;
|
||||
mutex_destroy(&ksp->ks_private_lock);
|
||||
kmem_free(ksp, sizeof (*ksp));
|
||||
}
|
||||
EXPORT_SYMBOL(__kstat_delete);
|
||||
|
||||
int
|
||||
spl_kstat_init(void)
|
||||
{
|
||||
mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
INIT_LIST_HEAD(&kstat_module_list);
|
||||
kstat_id = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_kstat_fini(void)
|
||||
{
|
||||
ASSERT(list_empty(&kstat_module_list));
|
||||
mutex_destroy(&kstat_module_lock);
|
||||
}
|
||||
@@ -0,0 +1,782 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Proc Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/systeminfo.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/proc.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
|
||||
typedef struct ctl_table __no_const spl_ctl_table;
|
||||
#else
|
||||
typedef struct ctl_table spl_ctl_table;
|
||||
#endif
|
||||
|
||||
static unsigned long table_min = 0;
|
||||
static unsigned long table_max = ~0;
|
||||
|
||||
static struct ctl_table_header *spl_header = NULL;
|
||||
static struct proc_dir_entry *proc_spl = NULL;
|
||||
static struct proc_dir_entry *proc_spl_kmem = NULL;
|
||||
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
|
||||
static struct proc_dir_entry *proc_spl_taskq_all = NULL;
|
||||
static struct proc_dir_entry *proc_spl_taskq = NULL;
|
||||
struct proc_dir_entry *proc_spl_kstat = NULL;
|
||||
|
||||
static int
|
||||
proc_copyin_string(char *kbuffer, int kbuffer_size, const char *ubuffer,
|
||||
int ubuffer_size)
|
||||
{
|
||||
int size;
|
||||
|
||||
if (ubuffer_size > kbuffer_size)
|
||||
return (-EOVERFLOW);
|
||||
|
||||
if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
|
||||
return (-EFAULT);
|
||||
|
||||
/* strip trailing whitespace */
|
||||
size = strnlen(kbuffer, ubuffer_size);
|
||||
while (size-- >= 0)
|
||||
if (!isspace(kbuffer[size]))
|
||||
break;
|
||||
|
||||
/* empty string */
|
||||
if (size < 0)
|
||||
return (-EINVAL);
|
||||
|
||||
/* no space to terminate */
|
||||
if (size == kbuffer_size)
|
||||
return (-EOVERFLOW);
|
||||
|
||||
kbuffer[size + 1] = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
proc_copyout_string(char *ubuffer, int ubuffer_size, const char *kbuffer,
|
||||
char *append)
|
||||
{
|
||||
/*
|
||||
* NB if 'append' != NULL, it's a single character to append to the
|
||||
* copied out string - usually "\n", for /proc entries and
|
||||
* (i.e. a terminating zero byte) for sysctl entries
|
||||
*/
|
||||
int size = MIN(strlen(kbuffer), ubuffer_size);
|
||||
|
||||
if (copy_to_user(ubuffer, kbuffer, size))
|
||||
return (-EFAULT);
|
||||
|
||||
if (append != NULL && size < ubuffer_size) {
|
||||
if (copy_to_user(ubuffer + size, append, 1))
|
||||
return (-EFAULT);
|
||||
|
||||
size++;
|
||||
}
|
||||
|
||||
return (size);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
static int
|
||||
proc_domemused(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int rc = 0;
|
||||
unsigned long min = 0, max = ~0, val;
|
||||
spl_ctl_table dummy = *table;
|
||||
|
||||
dummy.data = &val;
|
||||
dummy.proc_handler = &proc_dointvec;
|
||||
dummy.extra1 = &min;
|
||||
dummy.extra2 = &max;
|
||||
|
||||
if (write) {
|
||||
*ppos += *lenp;
|
||||
} else {
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
val = atomic64_read((atomic64_t *)table->data);
|
||||
#else
|
||||
val = atomic_read((atomic_t *)table->data);
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
static int
|
||||
proc_doslab(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int rc = 0;
|
||||
unsigned long min = 0, max = ~0, val = 0, mask;
|
||||
spl_ctl_table dummy = *table;
|
||||
spl_kmem_cache_t *skc;
|
||||
|
||||
dummy.data = &val;
|
||||
dummy.proc_handler = &proc_dointvec;
|
||||
dummy.extra1 = &min;
|
||||
dummy.extra2 = &max;
|
||||
|
||||
if (write) {
|
||||
*ppos += *lenp;
|
||||
} else {
|
||||
down_read(&spl_kmem_cache_sem);
|
||||
mask = (unsigned long)table->data;
|
||||
|
||||
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
|
||||
|
||||
/* Only use slabs of the correct kmem/vmem type */
|
||||
if (!(skc->skc_flags & mask))
|
||||
continue;
|
||||
|
||||
/* Sum the specified field for selected slabs */
|
||||
switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
|
||||
case KMC_TOTAL:
|
||||
val += skc->skc_slab_size * skc->skc_slab_total;
|
||||
break;
|
||||
case KMC_ALLOC:
|
||||
val += skc->skc_obj_size * skc->skc_obj_alloc;
|
||||
break;
|
||||
case KMC_MAX:
|
||||
val += skc->skc_obj_size * skc->skc_obj_max;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
up_read(&spl_kmem_cache_sem);
|
||||
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static int
|
||||
proc_dohostid(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int len, rc = 0;
|
||||
char *end, str[32];
|
||||
|
||||
if (write) {
|
||||
/*
|
||||
* We can't use proc_doulongvec_minmax() in the write
|
||||
* case here because hostid while a hex value has no
|
||||
* leading 0x which confuses the helper function.
|
||||
*/
|
||||
rc = proc_copyin_string(str, sizeof (str), buffer, *lenp);
|
||||
if (rc < 0)
|
||||
return (rc);
|
||||
|
||||
spl_hostid = simple_strtoul(str, &end, 16);
|
||||
if (str == end)
|
||||
return (-EINVAL);
|
||||
|
||||
} else {
|
||||
len = snprintf(str, sizeof (str), "%lx",
|
||||
(unsigned long) zone_get_hostid(NULL));
|
||||
if (*ppos >= len)
|
||||
rc = 0;
|
||||
else
|
||||
rc = proc_copyout_string(buffer,
|
||||
*lenp, str + *ppos, "\n");
|
||||
|
||||
if (rc >= 0) {
|
||||
*lenp = rc;
|
||||
*ppos += rc;
|
||||
}
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_seq_show_headers(struct seq_file *f)
|
||||
{
|
||||
seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
|
||||
"taskq", "act", "nthr", "spwn", "maxt", "pri",
|
||||
"mina", "maxa", "cura", "flags");
|
||||
}
|
||||
|
||||
/* indices into the lheads array below */
|
||||
#define LHEAD_PEND 0
|
||||
#define LHEAD_PRIO 1
|
||||
#define LHEAD_DELAY 2
|
||||
#define LHEAD_WAIT 3
|
||||
#define LHEAD_ACTIVE 4
|
||||
#define LHEAD_SIZE 5
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
static unsigned int spl_max_show_tasks = 512;
|
||||
module_param(spl_max_show_tasks, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
|
||||
/* END CSTYLED */
|
||||
|
||||
static int
|
||||
taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
|
||||
{
|
||||
taskq_t *tq = p;
|
||||
taskq_thread_t *tqt;
|
||||
spl_wait_queue_entry_t *wq;
|
||||
struct task_struct *tsk;
|
||||
taskq_ent_t *tqe;
|
||||
char name[100];
|
||||
struct list_head *lheads[LHEAD_SIZE], *lh;
|
||||
static char *list_names[LHEAD_SIZE] =
|
||||
{"pend", "prio", "delay", "wait", "active" };
|
||||
int i, j, have_lheads = 0;
|
||||
unsigned long wflags, flags;
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
||||
spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
|
||||
|
||||
/* get the various lists and check whether they're empty */
|
||||
lheads[LHEAD_PEND] = &tq->tq_pend_list;
|
||||
lheads[LHEAD_PRIO] = &tq->tq_prio_list;
|
||||
lheads[LHEAD_DELAY] = &tq->tq_delay_list;
|
||||
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
|
||||
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
|
||||
#else
|
||||
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
|
||||
#endif
|
||||
lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
|
||||
|
||||
for (i = 0; i < LHEAD_SIZE; ++i) {
|
||||
if (list_empty(lheads[i]))
|
||||
lheads[i] = NULL;
|
||||
else
|
||||
++have_lheads;
|
||||
}
|
||||
|
||||
/* early return in non-"all" mode if lists are all empty */
|
||||
if (!allflag && !have_lheads) {
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* unlock the waitq quickly */
|
||||
if (!lheads[LHEAD_WAIT])
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
|
||||
/* show the base taskq contents */
|
||||
snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
|
||||
seq_printf(f, "%-25s ", name);
|
||||
seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
|
||||
tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
|
||||
tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
|
||||
tq->tq_nalloc, tq->tq_flags);
|
||||
|
||||
/* show the active list */
|
||||
if (lheads[LHEAD_ACTIVE]) {
|
||||
j = 0;
|
||||
list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[LHEAD_ACTIVE]);
|
||||
else if (j == 2) {
|
||||
seq_printf(f, "\n\t ");
|
||||
j = 0;
|
||||
}
|
||||
seq_printf(f, " [%d]%pf(%ps)",
|
||||
tqt->tqt_thread->pid,
|
||||
tqt->tqt_task->tqent_func,
|
||||
tqt->tqt_task->tqent_arg);
|
||||
++j;
|
||||
}
|
||||
seq_printf(f, "\n");
|
||||
}
|
||||
|
||||
for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
|
||||
if (lheads[i]) {
|
||||
j = 0;
|
||||
list_for_each(lh, lheads[i]) {
|
||||
if (spl_max_show_tasks != 0 &&
|
||||
j >= spl_max_show_tasks) {
|
||||
seq_printf(f, "\n\t(truncated)");
|
||||
break;
|
||||
}
|
||||
/* show the wait waitq list */
|
||||
if (i == LHEAD_WAIT) {
|
||||
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
|
||||
wq = list_entry(lh,
|
||||
spl_wait_queue_entry_t, entry);
|
||||
#else
|
||||
wq = list_entry(lh,
|
||||
spl_wait_queue_entry_t, task_list);
|
||||
#endif
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[i]);
|
||||
else if (j % 8 == 0)
|
||||
seq_printf(f, "\n\t ");
|
||||
|
||||
tsk = wq->private;
|
||||
seq_printf(f, " %d", tsk->pid);
|
||||
/* pend, prio and delay lists */
|
||||
} else {
|
||||
tqe = list_entry(lh, taskq_ent_t,
|
||||
tqent_list);
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[i]);
|
||||
else if (j % 2 == 0)
|
||||
seq_printf(f, "\n\t ");
|
||||
|
||||
seq_printf(f, " %pf(%ps)",
|
||||
tqe->tqent_func,
|
||||
tqe->tqent_arg);
|
||||
}
|
||||
++j;
|
||||
}
|
||||
seq_printf(f, "\n");
|
||||
}
|
||||
if (lheads[LHEAD_WAIT])
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
taskq_all_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
return (taskq_seq_show_impl(f, p, B_TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
taskq_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
return (taskq_seq_show_impl(f, p, B_FALSE));
|
||||
}
|
||||
|
||||
static void *
|
||||
taskq_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t n = *pos;
|
||||
|
||||
down_read(&tq_list_sem);
|
||||
if (!n)
|
||||
taskq_seq_show_headers(f);
|
||||
|
||||
p = tq_list.next;
|
||||
while (n--) {
|
||||
p = p->next;
|
||||
if (p == &tq_list)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
return (list_entry(p, taskq_t, tq_taskqs));
|
||||
}
|
||||
|
||||
static void *
|
||||
taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
||||
{
|
||||
taskq_t *tq = p;
|
||||
|
||||
++*pos;
|
||||
return ((tq->tq_taskqs.next == &tq_list) ?
|
||||
NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
|
||||
}
|
||||
|
||||
static void
|
||||
slab_seq_show_headers(struct seq_file *f)
|
||||
{
|
||||
seq_printf(f,
|
||||
"--------------------- cache ----------"
|
||||
"--------------------------------------------- "
|
||||
"----- slab ------ "
|
||||
"---- object ----- "
|
||||
"--- emergency ---\n");
|
||||
seq_printf(f,
|
||||
"name "
|
||||
" flags size alloc slabsize objsize "
|
||||
"total alloc max "
|
||||
"total alloc max "
|
||||
"dlock alloc max\n");
|
||||
}
|
||||
|
||||
static int
|
||||
slab_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
spl_kmem_cache_t *skc = p;
|
||||
|
||||
ASSERT(skc->skc_magic == SKC_MAGIC);
|
||||
|
||||
/*
|
||||
* Backed by Linux slab see /proc/slabinfo.
|
||||
*/
|
||||
if (skc->skc_flags & KMC_SLAB)
|
||||
return (0);
|
||||
|
||||
spin_lock(&skc->skc_lock);
|
||||
seq_printf(f, "%-36s ", skc->skc_name);
|
||||
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
|
||||
"%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
|
||||
(long unsigned)skc->skc_flags,
|
||||
(long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
|
||||
(long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
|
||||
(unsigned)skc->skc_slab_size,
|
||||
(unsigned)skc->skc_obj_size,
|
||||
(long unsigned)skc->skc_slab_total,
|
||||
(long unsigned)skc->skc_slab_alloc,
|
||||
(long unsigned)skc->skc_slab_max,
|
||||
(long unsigned)skc->skc_obj_total,
|
||||
(long unsigned)skc->skc_obj_alloc,
|
||||
(long unsigned)skc->skc_obj_max,
|
||||
(long unsigned)skc->skc_obj_deadlock,
|
||||
(long unsigned)skc->skc_obj_emergency,
|
||||
(long unsigned)skc->skc_obj_emergency_max);
|
||||
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void *
|
||||
slab_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t n = *pos;
|
||||
|
||||
down_read(&spl_kmem_cache_sem);
|
||||
if (!n)
|
||||
slab_seq_show_headers(f);
|
||||
|
||||
p = spl_kmem_cache_list.next;
|
||||
while (n--) {
|
||||
p = p->next;
|
||||
if (p == &spl_kmem_cache_list)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
return (list_entry(p, spl_kmem_cache_t, skc_list));
|
||||
}
|
||||
|
||||
static void *
|
||||
slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
||||
{
|
||||
spl_kmem_cache_t *skc = p;
|
||||
|
||||
++*pos;
|
||||
return ((skc->skc_list.next == &spl_kmem_cache_list) ?
|
||||
NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
|
||||
}
|
||||
|
||||
static void
|
||||
slab_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
up_read(&spl_kmem_cache_sem);
|
||||
}
|
||||
|
||||
static struct seq_operations slab_seq_ops = {
|
||||
.show = slab_seq_show,
|
||||
.start = slab_seq_start,
|
||||
.next = slab_seq_next,
|
||||
.stop = slab_seq_stop,
|
||||
};
|
||||
|
||||
static int
|
||||
proc_slab_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return (seq_open(filp, &slab_seq_ops));
|
||||
}
|
||||
|
||||
static struct file_operations proc_slab_operations = {
|
||||
.open = proc_slab_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void
|
||||
taskq_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
up_read(&tq_list_sem);
|
||||
}
|
||||
|
||||
static struct seq_operations taskq_all_seq_ops = {
|
||||
.show = taskq_all_seq_show,
|
||||
.start = taskq_seq_start,
|
||||
.next = taskq_seq_next,
|
||||
.stop = taskq_seq_stop,
|
||||
};
|
||||
|
||||
static struct seq_operations taskq_seq_ops = {
|
||||
.show = taskq_seq_show,
|
||||
.start = taskq_seq_start,
|
||||
.next = taskq_seq_next,
|
||||
.stop = taskq_seq_stop,
|
||||
};
|
||||
|
||||
static int
|
||||
proc_taskq_all_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return (seq_open(filp, &taskq_all_seq_ops));
|
||||
}
|
||||
|
||||
static int
|
||||
proc_taskq_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return (seq_open(filp, &taskq_seq_ops));
|
||||
}
|
||||
|
||||
static struct file_operations proc_taskq_all_operations = {
|
||||
.open = proc_taskq_all_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static struct file_operations proc_taskq_operations = {
|
||||
.open = proc_taskq_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static struct ctl_table spl_kmem_table[] = {
|
||||
#ifdef DEBUG_KMEM
|
||||
{
|
||||
.procname = "kmem_used",
|
||||
.data = &kmem_alloc_used,
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
.maxlen = sizeof (atomic64_t),
|
||||
#else
|
||||
.maxlen = sizeof (atomic_t),
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_domemused,
|
||||
},
|
||||
{
|
||||
.procname = "kmem_max",
|
||||
.data = &kmem_alloc_max,
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
},
|
||||
#endif /* DEBUG_KMEM */
|
||||
{
|
||||
.procname = "slab_kmem_total",
|
||||
.data = (void *)(KMC_KMEM | KMC_TOTAL),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{
|
||||
.procname = "slab_kmem_alloc",
|
||||
.data = (void *)(KMC_KMEM | KMC_ALLOC),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{
|
||||
.procname = "slab_kmem_max",
|
||||
.data = (void *)(KMC_KMEM | KMC_MAX),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{
|
||||
.procname = "slab_vmem_total",
|
||||
.data = (void *)(KMC_VMEM | KMC_TOTAL),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{
|
||||
.procname = "slab_vmem_alloc",
|
||||
.data = (void *)(KMC_VMEM | KMC_ALLOC),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{
|
||||
.procname = "slab_vmem_max",
|
||||
.data = (void *)(KMC_VMEM | KMC_MAX),
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doslab,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
static struct ctl_table spl_kstat_table[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
static struct ctl_table spl_table[] = {
|
||||
/*
|
||||
* NB No .strategy entries have been provided since
|
||||
* sysctl(8) prefers to go via /proc for portability.
|
||||
*/
|
||||
{
|
||||
.procname = "gitrev",
|
||||
.data = spl_gitrev,
|
||||
.maxlen = sizeof (spl_gitrev),
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_dostring,
|
||||
},
|
||||
{
|
||||
.procname = "hostid",
|
||||
.data = &spl_hostid,
|
||||
.maxlen = sizeof (unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dohostid,
|
||||
},
|
||||
{
|
||||
.procname = "kmem",
|
||||
.mode = 0555,
|
||||
.child = spl_kmem_table,
|
||||
},
|
||||
{
|
||||
.procname = "kstat",
|
||||
.mode = 0555,
|
||||
.child = spl_kstat_table,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
static struct ctl_table spl_dir[] = {
|
||||
{
|
||||
.procname = "spl",
|
||||
.mode = 0555,
|
||||
.child = spl_table,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table spl_root[] = {
|
||||
{
|
||||
#ifdef HAVE_CTL_NAME
|
||||
.ctl_name = CTL_KERN,
|
||||
#endif
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = spl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
int
|
||||
spl_proc_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
spl_header = register_sysctl_table(spl_root);
|
||||
if (spl_header == NULL)
|
||||
return (-EUNATCH);
|
||||
|
||||
proc_spl = proc_mkdir("spl", NULL);
|
||||
if (proc_spl == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
|
||||
&proc_taskq_all_operations, NULL);
|
||||
if (proc_spl_taskq_all == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
|
||||
&proc_taskq_operations, NULL);
|
||||
if (proc_spl_taskq == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
|
||||
if (proc_spl_kmem == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
|
||||
&proc_slab_operations, NULL);
|
||||
if (proc_spl_kmem_slab == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_kstat = proc_mkdir("kstat", proc_spl);
|
||||
if (proc_spl_kstat == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
if (rc) {
|
||||
remove_proc_entry("kstat", proc_spl);
|
||||
remove_proc_entry("slab", proc_spl_kmem);
|
||||
remove_proc_entry("kmem", proc_spl);
|
||||
remove_proc_entry("taskq-all", proc_spl);
|
||||
remove_proc_entry("taskq", proc_spl);
|
||||
remove_proc_entry("spl", NULL);
|
||||
unregister_sysctl_table(spl_header);
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
void
|
||||
spl_proc_fini(void)
|
||||
{
|
||||
remove_proc_entry("kstat", proc_spl);
|
||||
remove_proc_entry("slab", proc_spl_kmem);
|
||||
remove_proc_entry("kmem", proc_spl);
|
||||
remove_proc_entry("taskq-all", proc_spl);
|
||||
remove_proc_entry("taskq", proc_spl);
|
||||
remove_proc_entry("spl", NULL);
|
||||
|
||||
ASSERT(spl_header != NULL);
|
||||
unregister_sysctl_table(spl_header);
|
||||
}
|
||||
@@ -0,0 +1,257 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/list.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/procfs_list.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
/*
|
||||
* A procfs_list is a wrapper around a linked list which implements the seq_file
|
||||
* interface, allowing the contents of the list to be exposed through procfs.
|
||||
* The kernel already has some utilities to help implement the seq_file
|
||||
* interface for linked lists (seq_list_*), but they aren't appropriate for use
|
||||
* with lists that have many entries, because seq_list_start walks the list at
|
||||
* the start of each read syscall to find where it left off, so reading a file
|
||||
* ends up being quadratic in the number of entries in the list.
|
||||
*
|
||||
* This implementation avoids this penalty by maintaining a separate cursor into
|
||||
* the list per instance of the file that is open. It also maintains some extra
|
||||
* information in each node of the list to prevent reads of entries that have
|
||||
* been dropped from the list.
|
||||
*
|
||||
* Callers should only add elements to the list using procfs_list_add, which
|
||||
* adds an element to the tail of the list. Other operations can be performed
|
||||
* directly on the wrapped list using the normal list manipulation functions,
|
||||
* but elements should only be removed from the head of the list.
|
||||
*/
|
||||
|
||||
#define NODE_ID(procfs_list, obj) \
|
||||
(((procfs_list_node_t *)(((char *)obj) + \
|
||||
(procfs_list)->pl_node_offset))->pln_id)
|
||||
|
||||
typedef struct procfs_list_cursor {
|
||||
procfs_list_t *procfs_list; /* List into which this cursor points */
|
||||
void *cached_node; /* Most recently accessed node */
|
||||
loff_t cached_pos; /* Position of cached_node */
|
||||
} procfs_list_cursor_t;
|
||||
|
||||
static int
|
||||
procfs_list_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
procfs_list_t *procfs_list = cursor->procfs_list;
|
||||
|
||||
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
|
||||
if (p == SEQ_START_TOKEN) {
|
||||
if (procfs_list->pl_show_header != NULL)
|
||||
return (procfs_list->pl_show_header(f));
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
return (procfs_list->pl_show(f, p));
|
||||
}
|
||||
|
||||
static void *
|
||||
procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
|
||||
{
|
||||
void *next_node;
|
||||
procfs_list_t *procfs_list = cursor->procfs_list;
|
||||
|
||||
if (cursor->cached_node == SEQ_START_TOKEN)
|
||||
next_node = list_head(&procfs_list->pl_list);
|
||||
else
|
||||
next_node = list_next(&procfs_list->pl_list,
|
||||
cursor->cached_node);
|
||||
|
||||
if (next_node != NULL) {
|
||||
cursor->cached_node = next_node;
|
||||
cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
|
||||
*pos = cursor->cached_pos;
|
||||
}
|
||||
return (next_node);
|
||||
}
|
||||
|
||||
static void *
|
||||
procfs_list_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
procfs_list_t *procfs_list = cursor->procfs_list;
|
||||
|
||||
mutex_enter(&procfs_list->pl_lock);
|
||||
|
||||
if (*pos == 0) {
|
||||
cursor->cached_node = SEQ_START_TOKEN;
|
||||
cursor->cached_pos = 0;
|
||||
return (SEQ_START_TOKEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if our cached pointer has become stale, which happens if the
|
||||
* the message where we left off has been dropped from the list since
|
||||
* the last read syscall completed.
|
||||
*/
|
||||
void *oldest_node = list_head(&procfs_list->pl_list);
|
||||
if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
|
||||
NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
|
||||
return (ERR_PTR(-EIO));
|
||||
|
||||
/*
|
||||
* If it isn't starting from the beginning of the file, the seq_file
|
||||
* code will either pick up at the same position it visited last or the
|
||||
* following one.
|
||||
*/
|
||||
if (*pos == cursor->cached_pos) {
|
||||
return (cursor->cached_node);
|
||||
} else {
|
||||
ASSERT3U(*pos, ==, cursor->cached_pos + 1);
|
||||
return (procfs_list_next_node(cursor, pos));
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
||||
{
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
|
||||
return (procfs_list_next_node(cursor, pos));
|
||||
}
|
||||
|
||||
static void
|
||||
procfs_list_seq_stop(struct seq_file *f, void *p)
|
||||
{
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
procfs_list_t *procfs_list = cursor->procfs_list;
|
||||
mutex_exit(&procfs_list->pl_lock);
|
||||
}
|
||||
|
||||
static struct seq_operations procfs_list_seq_ops = {
|
||||
.show = procfs_list_seq_show,
|
||||
.start = procfs_list_seq_start,
|
||||
.next = procfs_list_seq_next,
|
||||
.stop = procfs_list_seq_stop,
|
||||
};
|
||||
|
||||
static int
|
||||
procfs_list_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
int rc = seq_open_private(filp, &procfs_list_seq_ops,
|
||||
sizeof (procfs_list_cursor_t));
|
||||
if (rc != 0)
|
||||
return (rc);
|
||||
|
||||
struct seq_file *f = filp->private_data;
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
cursor->procfs_list = PDE_DATA(inode);
|
||||
cursor->cached_node = NULL;
|
||||
cursor->cached_pos = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
procfs_list_write(struct file *filp, const char __user *buf, size_t len,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct seq_file *f = filp->private_data;
|
||||
procfs_list_cursor_t *cursor = f->private;
|
||||
procfs_list_t *procfs_list = cursor->procfs_list;
|
||||
int rc;
|
||||
|
||||
if (procfs_list->pl_clear != NULL &&
|
||||
(rc = procfs_list->pl_clear(procfs_list)) != 0)
|
||||
return (-rc);
|
||||
return (len);
|
||||
}
|
||||
|
||||
static struct file_operations procfs_list_operations = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = procfs_list_open,
|
||||
.write = procfs_list_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release_private,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize a procfs_list and create a file for it in the proc filesystem
|
||||
* under the kstat namespace.
|
||||
*/
|
||||
void
|
||||
procfs_list_install(const char *module,
|
||||
const char *name,
|
||||
mode_t mode,
|
||||
procfs_list_t *procfs_list,
|
||||
int (*show)(struct seq_file *f, void *p),
|
||||
int (*show_header)(struct seq_file *f),
|
||||
int (*clear)(procfs_list_t *procfs_list),
|
||||
size_t procfs_list_node_off)
|
||||
{
|
||||
mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&procfs_list->pl_list,
|
||||
procfs_list_node_off + sizeof (procfs_list_node_t),
|
||||
procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
|
||||
procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
|
||||
procfs_list->pl_show = show;
|
||||
procfs_list->pl_show_header = show_header;
|
||||
procfs_list->pl_clear = clear;
|
||||
procfs_list->pl_node_offset = procfs_list_node_off;
|
||||
|
||||
kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
|
||||
kstat_proc_entry_install(&procfs_list->pl_kstat_entry, mode,
|
||||
&procfs_list_operations, procfs_list);
|
||||
}
|
||||
EXPORT_SYMBOL(procfs_list_install);
|
||||
|
||||
/* Remove the proc filesystem file corresponding to the given list */
|
||||
void
|
||||
procfs_list_uninstall(procfs_list_t *procfs_list)
|
||||
{
|
||||
kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
|
||||
}
|
||||
EXPORT_SYMBOL(procfs_list_uninstall);
|
||||
|
||||
void
|
||||
procfs_list_destroy(procfs_list_t *procfs_list)
|
||||
{
|
||||
ASSERT(list_is_empty(&procfs_list->pl_list));
|
||||
list_destroy(&procfs_list->pl_list);
|
||||
mutex_destroy(&procfs_list->pl_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(procfs_list_destroy);
|
||||
|
||||
/*
|
||||
* Add a new node to the tail of the list. While the standard list manipulation
|
||||
* functions can be use for all other operation, adding elements to the list
|
||||
* should only be done using this helper so that the id of the new node is set
|
||||
* correctly.
|
||||
*/
|
||||
void
|
||||
procfs_list_add(procfs_list_t *procfs_list, void *p)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
|
||||
NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
|
||||
list_insert_tail(&procfs_list->pl_list, p);
|
||||
}
|
||||
EXPORT_SYMBOL(procfs_list_add);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Thread Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/thread.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/tsd.h>
|
||||
#include <sys/simd.h>
|
||||
|
||||
/*
|
||||
* Thread interfaces
|
||||
*/
|
||||
typedef struct thread_priv_s {
|
||||
unsigned long tp_magic; /* Magic */
|
||||
int tp_name_size; /* Name size */
|
||||
char *tp_name; /* Name (without _thread suffix) */
|
||||
void (*tp_func)(void *); /* Registered function */
|
||||
void *tp_args; /* Args to be passed to function */
|
||||
size_t tp_len; /* Len to be passed to function */
|
||||
int tp_state; /* State to start thread at */
|
||||
pri_t tp_pri; /* Priority to start threat at */
|
||||
} thread_priv_t;
|
||||
|
||||
static int
|
||||
thread_generic_wrapper(void *arg)
|
||||
{
|
||||
thread_priv_t *tp = (thread_priv_t *)arg;
|
||||
void (*func)(void *);
|
||||
void *args;
|
||||
|
||||
ASSERT(tp->tp_magic == TP_MAGIC);
|
||||
func = tp->tp_func;
|
||||
args = tp->tp_args;
|
||||
set_current_state(tp->tp_state);
|
||||
set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
|
||||
kfpu_initialize();
|
||||
kmem_free(tp->tp_name, tp->tp_name_size);
|
||||
kmem_free(tp, sizeof (thread_priv_t));
|
||||
|
||||
if (func)
|
||||
func(args);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
__thread_exit(void)
|
||||
{
|
||||
tsd_exit();
|
||||
complete_and_exit(NULL, 0);
|
||||
/* Unreachable */
|
||||
}
|
||||
EXPORT_SYMBOL(__thread_exit);
|
||||
|
||||
/*
|
||||
* thread_create() may block forever if it cannot create a thread or
|
||||
* allocate memory. This is preferable to returning a NULL which Solaris
|
||||
* style callers likely never check for... since it can't fail.
|
||||
*/
|
||||
kthread_t *
|
||||
__thread_create(caddr_t stk, size_t stksize, thread_func_t func,
|
||||
const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
|
||||
{
|
||||
thread_priv_t *tp;
|
||||
struct task_struct *tsk;
|
||||
char *p;
|
||||
|
||||
/* Option pp is simply ignored */
|
||||
/* Variable stack size unsupported */
|
||||
ASSERT(stk == NULL);
|
||||
|
||||
tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
|
||||
if (tp == NULL)
|
||||
return (NULL);
|
||||
|
||||
tp->tp_magic = TP_MAGIC;
|
||||
tp->tp_name_size = strlen(name) + 1;
|
||||
|
||||
tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
|
||||
if (tp->tp_name == NULL) {
|
||||
kmem_free(tp, sizeof (thread_priv_t));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
strncpy(tp->tp_name, name, tp->tp_name_size);
|
||||
|
||||
/*
|
||||
* Strip trailing "_thread" from passed name which will be the func
|
||||
* name since the exposed API has no parameter for passing a name.
|
||||
*/
|
||||
p = strstr(tp->tp_name, "_thread");
|
||||
if (p)
|
||||
p[0] = '\0';
|
||||
|
||||
tp->tp_func = func;
|
||||
tp->tp_args = args;
|
||||
tp->tp_len = len;
|
||||
tp->tp_state = state;
|
||||
tp->tp_pri = pri;
|
||||
|
||||
tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
|
||||
"%s", tp->tp_name);
|
||||
if (IS_ERR(tsk))
|
||||
return (NULL);
|
||||
|
||||
wake_up_process(tsk);
|
||||
return ((kthread_t *)tsk);
|
||||
}
|
||||
EXPORT_SYMBOL(__thread_create);
|
||||
|
||||
/*
|
||||
* spl_kthread_create - Wrapper providing pre-3.13 semantics for
|
||||
* kthread_create() in which it is not killable and less likely
|
||||
* to return -ENOMEM.
|
||||
*/
|
||||
struct task_struct *
|
||||
spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
va_list args;
|
||||
char name[TASK_COMM_LEN];
|
||||
|
||||
va_start(args, namefmt);
|
||||
vsnprintf(name, sizeof (name), namefmt, args);
|
||||
va_end(args);
|
||||
do {
|
||||
tsk = kthread_create(func, data, "%s", name);
|
||||
if (IS_ERR(tsk)) {
|
||||
if (signal_pending(current)) {
|
||||
clear_thread_flag(TIF_SIGPENDING);
|
||||
continue;
|
||||
}
|
||||
if (PTR_ERR(tsk) == -ENOMEM)
|
||||
continue;
|
||||
return (NULL);
|
||||
} else {
|
||||
return (tsk);
|
||||
}
|
||||
} while (1);
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kthread_create);
|
||||
@@ -0,0 +1,720 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Lawrence Livermore National Security, LLC.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Thread Specific Data Implementation.
|
||||
*
|
||||
* Thread specific data has implemented using a hash table, this avoids
|
||||
* the need to add a member to the task structure and allows maximum
|
||||
* portability between kernels. This implementation has been optimized
|
||||
* to keep the tsd_set() and tsd_get() times as small as possible.
|
||||
*
|
||||
* The majority of the entries in the hash table are for specific tsd
|
||||
* entries. These entries are hashed by the product of their key and
|
||||
* pid because by design the key and pid are guaranteed to be unique.
|
||||
* Their product also has the desirable properly that it will be uniformly
|
||||
* distributed over the hash bins providing neither the pid nor key is zero.
|
||||
* Under linux the zero pid is always the init process and thus won't be
|
||||
* used, and this implementation is careful to never to assign a zero key.
|
||||
* By default the hash table is sized to 512 bins which is expected to
|
||||
* be sufficient for light to moderate usage of thread specific data.
|
||||
*
|
||||
* The hash table contains two additional type of entries. They first
|
||||
* type is entry is called a 'key' entry and it is added to the hash during
|
||||
* tsd_create(). It is used to store the address of the destructor function
|
||||
* and it is used as an anchor point. All tsd entries which use the same
|
||||
* key will be linked to this entry. This is used during tsd_destroy() to
|
||||
* quickly call the destructor function for all tsd associated with the key.
|
||||
* The 'key' entry may be looked up with tsd_hash_search() by passing the
|
||||
* key you wish to lookup and DTOR_PID constant as the pid.
|
||||
*
|
||||
* The second type of entry is called a 'pid' entry and it is added to the
|
||||
* hash the first time a process set a key. The 'pid' entry is also used
|
||||
* as an anchor and all tsd for the process will be linked to it. This
|
||||
* list is using during tsd_exit() to ensure all registered destructors
|
||||
* are run for the process. The 'pid' entry may be looked up with
|
||||
* tsd_hash_search() by passing the PID_KEY constant as the key, and
|
||||
* the process pid. Note that tsd_exit() is called by thread_exit()
|
||||
* so if your using the Solaris thread API you should not need to call
|
||||
* tsd_exit() directly.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/thread.h>
|
||||
#include <sys/tsd.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
typedef struct tsd_hash_bin {
|
||||
spinlock_t hb_lock;
|
||||
struct hlist_head hb_head;
|
||||
} tsd_hash_bin_t;
|
||||
|
||||
typedef struct tsd_hash_table {
|
||||
spinlock_t ht_lock;
|
||||
uint_t ht_bits;
|
||||
uint_t ht_key;
|
||||
tsd_hash_bin_t *ht_bins;
|
||||
} tsd_hash_table_t;
|
||||
|
||||
typedef struct tsd_hash_entry {
|
||||
uint_t he_key;
|
||||
pid_t he_pid;
|
||||
dtor_func_t he_dtor;
|
||||
void *he_value;
|
||||
struct hlist_node he_list;
|
||||
struct list_head he_key_list;
|
||||
struct list_head he_pid_list;
|
||||
} tsd_hash_entry_t;
|
||||
|
||||
static tsd_hash_table_t *tsd_hash_table = NULL;
|
||||
|
||||
|
||||
/*
|
||||
* tsd_hash_search - searches hash table for tsd_hash_entry
|
||||
* @table: hash table
|
||||
* @key: search key
|
||||
* @pid: search pid
|
||||
*/
|
||||
static tsd_hash_entry_t *
|
||||
tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
tsd_hash_entry_t *entry;
|
||||
tsd_hash_bin_t *bin;
|
||||
ulong_t hash;
|
||||
|
||||
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
|
||||
bin = &table->ht_bins[hash];
|
||||
spin_lock(&bin->hb_lock);
|
||||
hlist_for_each(node, &bin->hb_head) {
|
||||
entry = list_entry(node, tsd_hash_entry_t, he_list);
|
||||
if ((entry->he_key == key) && (entry->he_pid == pid)) {
|
||||
spin_unlock(&bin->hb_lock);
|
||||
return (entry);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&bin->hb_lock);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_dtor - call the destructor and free all entries on the list
|
||||
* @work: list of hash entries
|
||||
*
|
||||
* For a list of entries which have all already been removed from the
|
||||
* hash call their registered destructor then free the associated memory.
|
||||
*/
|
||||
static void
|
||||
tsd_hash_dtor(struct hlist_head *work)
|
||||
{
|
||||
tsd_hash_entry_t *entry;
|
||||
|
||||
while (!hlist_empty(work)) {
|
||||
entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
|
||||
hlist_del(&entry->he_list);
|
||||
|
||||
if (entry->he_dtor && entry->he_pid != DTOR_PID)
|
||||
entry->he_dtor(entry->he_value);
|
||||
|
||||
kmem_free(entry, sizeof (tsd_hash_entry_t));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_add - adds an entry to hash table
|
||||
* @table: hash table
|
||||
* @key: search key
|
||||
* @pid: search pid
|
||||
*
|
||||
* The caller is responsible for ensuring the unique key/pid do not
|
||||
* already exist in the hash table. This possible because all entries
|
||||
* are thread specific thus a concurrent thread will never attempt to
|
||||
* add this key/pid. Because multiple bins must be checked to add
|
||||
* links to the dtor and pid entries the entire table is locked.
|
||||
*/
|
||||
static int
|
||||
tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
|
||||
{
|
||||
tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
|
||||
tsd_hash_bin_t *bin;
|
||||
ulong_t hash;
|
||||
int rc = 0;
|
||||
|
||||
ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
|
||||
|
||||
/* New entry allocate structure, set value, and add to hash */
|
||||
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
|
||||
if (entry == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
entry->he_key = key;
|
||||
entry->he_pid = pid;
|
||||
entry->he_value = value;
|
||||
INIT_HLIST_NODE(&entry->he_list);
|
||||
INIT_LIST_HEAD(&entry->he_key_list);
|
||||
INIT_LIST_HEAD(&entry->he_pid_list);
|
||||
|
||||
spin_lock(&table->ht_lock);
|
||||
|
||||
/* Destructor entry must exist for all valid keys */
|
||||
dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
|
||||
ASSERT3P(dtor_entry, !=, NULL);
|
||||
entry->he_dtor = dtor_entry->he_dtor;
|
||||
|
||||
/* Process entry must exist for all valid processes */
|
||||
pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
|
||||
ASSERT3P(pid_entry, !=, NULL);
|
||||
|
||||
hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
|
||||
bin = &table->ht_bins[hash];
|
||||
spin_lock(&bin->hb_lock);
|
||||
|
||||
/* Add to the hash, key, and pid lists */
|
||||
hlist_add_head(&entry->he_list, &bin->hb_head);
|
||||
list_add(&entry->he_key_list, &dtor_entry->he_key_list);
|
||||
list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
|
||||
|
||||
spin_unlock(&bin->hb_lock);
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_add_key - adds a destructor entry to the hash table
|
||||
* @table: hash table
|
||||
* @keyp: search key
|
||||
* @dtor: key destructor
|
||||
*
|
||||
* For every unique key there is a single entry in the hash which is used
|
||||
* as anchor. All other thread specific entries for this key are linked
|
||||
* to this anchor via the 'he_key_list' list head. On return they keyp
|
||||
* will be set to the next available key for the hash table.
|
||||
*/
|
||||
static int
|
||||
tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
|
||||
{
|
||||
tsd_hash_entry_t *tmp_entry, *entry;
|
||||
tsd_hash_bin_t *bin;
|
||||
ulong_t hash;
|
||||
int keys_checked = 0;
|
||||
|
||||
ASSERT3P(table, !=, NULL);
|
||||
|
||||
/* Allocate entry to be used as a destructor for this key */
|
||||
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
|
||||
if (entry == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
/* Determine next available key value */
|
||||
spin_lock(&table->ht_lock);
|
||||
do {
|
||||
/* Limited to TSD_KEYS_MAX concurrent unique keys */
|
||||
if (table->ht_key++ > TSD_KEYS_MAX)
|
||||
table->ht_key = 1;
|
||||
|
||||
/* Ensure failure when all TSD_KEYS_MAX keys are in use */
|
||||
if (keys_checked++ >= TSD_KEYS_MAX) {
|
||||
spin_unlock(&table->ht_lock);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
|
||||
} while (tmp_entry);
|
||||
|
||||
/* Add destructor entry in to hash table */
|
||||
entry->he_key = *keyp = table->ht_key;
|
||||
entry->he_pid = DTOR_PID;
|
||||
entry->he_dtor = dtor;
|
||||
entry->he_value = NULL;
|
||||
INIT_HLIST_NODE(&entry->he_list);
|
||||
INIT_LIST_HEAD(&entry->he_key_list);
|
||||
INIT_LIST_HEAD(&entry->he_pid_list);
|
||||
|
||||
hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
|
||||
bin = &table->ht_bins[hash];
|
||||
spin_lock(&bin->hb_lock);
|
||||
|
||||
hlist_add_head(&entry->he_list, &bin->hb_head);
|
||||
|
||||
spin_unlock(&bin->hb_lock);
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_add_pid - adds a process entry to the hash table
|
||||
* @table: hash table
|
||||
* @pid: search pid
|
||||
*
|
||||
* For every process there is a single entry in the hash which is used
|
||||
* as anchor. All other thread specific entries for this process are
|
||||
* linked to this anchor via the 'he_pid_list' list head.
|
||||
*/
|
||||
static int
|
||||
tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
|
||||
{
|
||||
tsd_hash_entry_t *entry;
|
||||
tsd_hash_bin_t *bin;
|
||||
ulong_t hash;
|
||||
|
||||
/* Allocate entry to be used as the process reference */
|
||||
entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
|
||||
if (entry == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
spin_lock(&table->ht_lock);
|
||||
entry->he_key = PID_KEY;
|
||||
entry->he_pid = pid;
|
||||
entry->he_dtor = NULL;
|
||||
entry->he_value = NULL;
|
||||
INIT_HLIST_NODE(&entry->he_list);
|
||||
INIT_LIST_HEAD(&entry->he_key_list);
|
||||
INIT_LIST_HEAD(&entry->he_pid_list);
|
||||
|
||||
hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
|
||||
bin = &table->ht_bins[hash];
|
||||
spin_lock(&bin->hb_lock);
|
||||
|
||||
hlist_add_head(&entry->he_list, &bin->hb_head);
|
||||
|
||||
spin_unlock(&bin->hb_lock);
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_del - delete an entry from hash table, key, and pid lists
|
||||
* @table: hash table
|
||||
* @key: search key
|
||||
* @pid: search pid
|
||||
*/
|
||||
static void
|
||||
tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
|
||||
{
|
||||
hlist_del(&entry->he_list);
|
||||
list_del_init(&entry->he_key_list);
|
||||
list_del_init(&entry->he_pid_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_table_init - allocate a hash table
|
||||
* @bits: hash table size
|
||||
*
|
||||
* A hash table with 2^bits bins will be created, it may not be resized
|
||||
* after the fact and must be free'd with tsd_hash_table_fini().
|
||||
*/
|
||||
static tsd_hash_table_t *
|
||||
tsd_hash_table_init(uint_t bits)
|
||||
{
|
||||
tsd_hash_table_t *table;
|
||||
int hash, size = (1 << bits);
|
||||
|
||||
table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
|
||||
if (table == NULL)
|
||||
return (NULL);
|
||||
|
||||
table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
|
||||
if (table->ht_bins == NULL) {
|
||||
kmem_free(table, sizeof (tsd_hash_table_t));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
for (hash = 0; hash < size; hash++) {
|
||||
spin_lock_init(&table->ht_bins[hash].hb_lock);
|
||||
INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
|
||||
}
|
||||
|
||||
spin_lock_init(&table->ht_lock);
|
||||
table->ht_bits = bits;
|
||||
table->ht_key = 1;
|
||||
|
||||
return (table);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_hash_table_fini - free a hash table
|
||||
* @table: hash table
|
||||
*
|
||||
* Free a hash table allocated by tsd_hash_table_init(). If the hash
|
||||
* table is not empty this function will call the proper destructor for
|
||||
* all remaining entries before freeing the memory used by those entries.
|
||||
*/
|
||||
static void
|
||||
tsd_hash_table_fini(tsd_hash_table_t *table)
|
||||
{
|
||||
HLIST_HEAD(work);
|
||||
tsd_hash_bin_t *bin;
|
||||
tsd_hash_entry_t *entry;
|
||||
int size, i;
|
||||
|
||||
ASSERT3P(table, !=, NULL);
|
||||
spin_lock(&table->ht_lock);
|
||||
for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
|
||||
bin = &table->ht_bins[i];
|
||||
spin_lock(&bin->hb_lock);
|
||||
while (!hlist_empty(&bin->hb_head)) {
|
||||
entry = hlist_entry(bin->hb_head.first,
|
||||
tsd_hash_entry_t, he_list);
|
||||
tsd_hash_del(table, entry);
|
||||
hlist_add_head(&entry->he_list, &work);
|
||||
}
|
||||
spin_unlock(&bin->hb_lock);
|
||||
}
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
tsd_hash_dtor(&work);
|
||||
kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
|
||||
kmem_free(table, sizeof (tsd_hash_table_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_remove_entry - remove a tsd entry for this thread
|
||||
* @entry: entry to remove
|
||||
*
|
||||
* Remove the thread specific data @entry for this thread.
|
||||
* If this is the last entry for this thread, also remove the PID entry.
|
||||
*/
|
||||
static void
|
||||
tsd_remove_entry(tsd_hash_entry_t *entry)
|
||||
{
|
||||
HLIST_HEAD(work);
|
||||
tsd_hash_table_t *table;
|
||||
tsd_hash_entry_t *pid_entry;
|
||||
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
|
||||
ulong_t hash;
|
||||
|
||||
table = tsd_hash_table;
|
||||
ASSERT3P(table, !=, NULL);
|
||||
ASSERT3P(entry, !=, NULL);
|
||||
|
||||
spin_lock(&table->ht_lock);
|
||||
|
||||
hash = hash_long((ulong_t)entry->he_key *
|
||||
(ulong_t)entry->he_pid, table->ht_bits);
|
||||
entry_bin = &table->ht_bins[hash];
|
||||
|
||||
/* save the possible pid_entry */
|
||||
pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
|
||||
he_pid_list);
|
||||
|
||||
/* remove entry */
|
||||
spin_lock(&entry_bin->hb_lock);
|
||||
tsd_hash_del(table, entry);
|
||||
hlist_add_head(&entry->he_list, &work);
|
||||
spin_unlock(&entry_bin->hb_lock);
|
||||
|
||||
/* if pid_entry is indeed pid_entry, then remove it if it's empty */
|
||||
if (pid_entry->he_key == PID_KEY &&
|
||||
list_empty(&pid_entry->he_pid_list)) {
|
||||
hash = hash_long((ulong_t)pid_entry->he_key *
|
||||
(ulong_t)pid_entry->he_pid, table->ht_bits);
|
||||
pid_entry_bin = &table->ht_bins[hash];
|
||||
|
||||
spin_lock(&pid_entry_bin->hb_lock);
|
||||
tsd_hash_del(table, pid_entry);
|
||||
hlist_add_head(&pid_entry->he_list, &work);
|
||||
spin_unlock(&pid_entry_bin->hb_lock);
|
||||
}
|
||||
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
tsd_hash_dtor(&work);
|
||||
}
|
||||
|
||||
/*
|
||||
* tsd_set - set thread specific data
|
||||
* @key: lookup key
|
||||
* @value: value to set
|
||||
*
|
||||
* Caller must prevent racing tsd_create() or tsd_destroy(), protected
|
||||
* from racing tsd_get() or tsd_set() because it is thread specific.
|
||||
* This function has been optimized to be fast for the update case.
|
||||
* When setting the tsd initially it will be slower due to additional
|
||||
* required locking and potential memory allocations.
|
||||
*/
|
||||
int
|
||||
tsd_set(uint_t key, void *value)
|
||||
{
|
||||
tsd_hash_table_t *table;
|
||||
tsd_hash_entry_t *entry;
|
||||
pid_t pid;
|
||||
int rc;
|
||||
/* mark remove if value is NULL */
|
||||
boolean_t remove = (value == NULL);
|
||||
|
||||
table = tsd_hash_table;
|
||||
pid = curthread->pid;
|
||||
ASSERT3P(table, !=, NULL);
|
||||
|
||||
if ((key == 0) || (key > TSD_KEYS_MAX))
|
||||
return (EINVAL);
|
||||
|
||||
/* Entry already exists in hash table update value */
|
||||
entry = tsd_hash_search(table, key, pid);
|
||||
if (entry) {
|
||||
entry->he_value = value;
|
||||
/* remove the entry */
|
||||
if (remove)
|
||||
tsd_remove_entry(entry);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* don't create entry if value is NULL */
|
||||
if (remove)
|
||||
return (0);
|
||||
|
||||
/* Add a process entry to the hash if not yet exists */
|
||||
entry = tsd_hash_search(table, PID_KEY, pid);
|
||||
if (entry == NULL) {
|
||||
rc = tsd_hash_add_pid(table, pid);
|
||||
if (rc)
|
||||
return (rc);
|
||||
}
|
||||
|
||||
rc = tsd_hash_add(table, key, pid, value);
|
||||
return (rc);
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_set);
|
||||
|
||||
/*
|
||||
* tsd_get - get thread specific data
|
||||
* @key: lookup key
|
||||
*
|
||||
* Caller must prevent racing tsd_create() or tsd_destroy(). This
|
||||
* implementation is designed to be fast and scalable, it does not
|
||||
* lock the entire table only a single hash bin.
|
||||
*/
|
||||
void *
|
||||
tsd_get(uint_t key)
|
||||
{
|
||||
tsd_hash_entry_t *entry;
|
||||
|
||||
ASSERT3P(tsd_hash_table, !=, NULL);
|
||||
|
||||
if ((key == 0) || (key > TSD_KEYS_MAX))
|
||||
return (NULL);
|
||||
|
||||
entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
|
||||
if (entry == NULL)
|
||||
return (NULL);
|
||||
|
||||
return (entry->he_value);
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_get);
|
||||
|
||||
/*
|
||||
* tsd_get_by_thread - get thread specific data for specified thread
|
||||
* @key: lookup key
|
||||
* @thread: thread to lookup
|
||||
*
|
||||
* Caller must prevent racing tsd_create() or tsd_destroy(). This
|
||||
* implementation is designed to be fast and scalable, it does not
|
||||
* lock the entire table only a single hash bin.
|
||||
*/
|
||||
void *
|
||||
tsd_get_by_thread(uint_t key, kthread_t *thread)
|
||||
{
|
||||
tsd_hash_entry_t *entry;
|
||||
|
||||
ASSERT3P(tsd_hash_table, !=, NULL);
|
||||
|
||||
if ((key == 0) || (key > TSD_KEYS_MAX))
|
||||
return (NULL);
|
||||
|
||||
entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
|
||||
if (entry == NULL)
|
||||
return (NULL);
|
||||
|
||||
return (entry->he_value);
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_get_by_thread);
|
||||
|
||||
/*
|
||||
* tsd_create - create thread specific data key
|
||||
* @keyp: lookup key address
|
||||
* @dtor: destructor called during tsd_destroy() or tsd_exit()
|
||||
*
|
||||
* Provided key must be set to 0 or it assumed to be already in use.
|
||||
* The dtor is allowed to be NULL in which case no additional cleanup
|
||||
* for the data is performed during tsd_destroy() or tsd_exit().
|
||||
*
|
||||
* Caller must prevent racing tsd_set() or tsd_get(), this function is
|
||||
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
|
||||
*/
|
||||
void
|
||||
tsd_create(uint_t *keyp, dtor_func_t dtor)
|
||||
{
|
||||
ASSERT3P(keyp, !=, NULL);
|
||||
if (*keyp)
|
||||
return;
|
||||
|
||||
(void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_create);
|
||||
|
||||
/*
|
||||
* tsd_destroy - destroy thread specific data
|
||||
* @keyp: lookup key address
|
||||
*
|
||||
* Destroys the thread specific data on all threads which use this key.
|
||||
*
|
||||
* Caller must prevent racing tsd_set() or tsd_get(), this function is
|
||||
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
|
||||
*/
|
||||
void
|
||||
tsd_destroy(uint_t *keyp)
|
||||
{
|
||||
HLIST_HEAD(work);
|
||||
tsd_hash_table_t *table;
|
||||
tsd_hash_entry_t *dtor_entry, *entry;
|
||||
tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
|
||||
ulong_t hash;
|
||||
|
||||
table = tsd_hash_table;
|
||||
ASSERT3P(table, !=, NULL);
|
||||
|
||||
spin_lock(&table->ht_lock);
|
||||
dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
|
||||
if (dtor_entry == NULL) {
|
||||
spin_unlock(&table->ht_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* All threads which use this key must be linked off of the
|
||||
* DTOR_PID entry. They are removed from the hash table and
|
||||
* linked in to a private working list to be destroyed.
|
||||
*/
|
||||
while (!list_empty(&dtor_entry->he_key_list)) {
|
||||
entry = list_entry(dtor_entry->he_key_list.next,
|
||||
tsd_hash_entry_t, he_key_list);
|
||||
ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
|
||||
ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
|
||||
|
||||
hash = hash_long((ulong_t)entry->he_key *
|
||||
(ulong_t)entry->he_pid, table->ht_bits);
|
||||
entry_bin = &table->ht_bins[hash];
|
||||
|
||||
spin_lock(&entry_bin->hb_lock);
|
||||
tsd_hash_del(table, entry);
|
||||
hlist_add_head(&entry->he_list, &work);
|
||||
spin_unlock(&entry_bin->hb_lock);
|
||||
}
|
||||
|
||||
hash = hash_long((ulong_t)dtor_entry->he_key *
|
||||
(ulong_t)dtor_entry->he_pid, table->ht_bits);
|
||||
dtor_entry_bin = &table->ht_bins[hash];
|
||||
|
||||
spin_lock(&dtor_entry_bin->hb_lock);
|
||||
tsd_hash_del(table, dtor_entry);
|
||||
hlist_add_head(&dtor_entry->he_list, &work);
|
||||
spin_unlock(&dtor_entry_bin->hb_lock);
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
tsd_hash_dtor(&work);
|
||||
*keyp = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_destroy);
|
||||
|
||||
/*
|
||||
* tsd_exit - destroys all thread specific data for this thread
|
||||
*
|
||||
* Destroys all the thread specific data for this thread.
|
||||
*
|
||||
* Caller must prevent racing tsd_set() or tsd_get(), this function is
|
||||
* safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
|
||||
*/
|
||||
void
|
||||
tsd_exit(void)
|
||||
{
|
||||
HLIST_HEAD(work);
|
||||
tsd_hash_table_t *table;
|
||||
tsd_hash_entry_t *pid_entry, *entry;
|
||||
tsd_hash_bin_t *pid_entry_bin, *entry_bin;
|
||||
ulong_t hash;
|
||||
|
||||
table = tsd_hash_table;
|
||||
ASSERT3P(table, !=, NULL);
|
||||
|
||||
spin_lock(&table->ht_lock);
|
||||
pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
|
||||
if (pid_entry == NULL) {
|
||||
spin_unlock(&table->ht_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* All keys associated with this pid must be linked off of the
|
||||
* PID_KEY entry. They are removed from the hash table and
|
||||
* linked in to a private working list to be destroyed.
|
||||
*/
|
||||
|
||||
while (!list_empty(&pid_entry->he_pid_list)) {
|
||||
entry = list_entry(pid_entry->he_pid_list.next,
|
||||
tsd_hash_entry_t, he_pid_list);
|
||||
ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
|
||||
|
||||
hash = hash_long((ulong_t)entry->he_key *
|
||||
(ulong_t)entry->he_pid, table->ht_bits);
|
||||
entry_bin = &table->ht_bins[hash];
|
||||
|
||||
spin_lock(&entry_bin->hb_lock);
|
||||
tsd_hash_del(table, entry);
|
||||
hlist_add_head(&entry->he_list, &work);
|
||||
spin_unlock(&entry_bin->hb_lock);
|
||||
}
|
||||
|
||||
hash = hash_long((ulong_t)pid_entry->he_key *
|
||||
(ulong_t)pid_entry->he_pid, table->ht_bits);
|
||||
pid_entry_bin = &table->ht_bins[hash];
|
||||
|
||||
spin_lock(&pid_entry_bin->hb_lock);
|
||||
tsd_hash_del(table, pid_entry);
|
||||
hlist_add_head(&pid_entry->he_list, &work);
|
||||
spin_unlock(&pid_entry_bin->hb_lock);
|
||||
spin_unlock(&table->ht_lock);
|
||||
|
||||
tsd_hash_dtor(&work);
|
||||
}
|
||||
EXPORT_SYMBOL(tsd_exit);
|
||||
|
||||
int
|
||||
spl_tsd_init(void)
|
||||
{
|
||||
tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
|
||||
if (tsd_hash_table == NULL)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_tsd_fini(void)
|
||||
{
|
||||
tsd_hash_table_fini(tsd_hash_table);
|
||||
tsd_hash_table = NULL;
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <sys/debug.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/shrinker.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
vmem_t *heap_arena = NULL;
|
||||
EXPORT_SYMBOL(heap_arena);
|
||||
|
||||
vmem_t *zio_alloc_arena = NULL;
|
||||
EXPORT_SYMBOL(zio_alloc_arena);
|
||||
|
||||
vmem_t *zio_arena = NULL;
|
||||
EXPORT_SYMBOL(zio_arena);
|
||||
|
||||
#define VMEM_FLOOR_SIZE (4 * 1024 * 1024) /* 4MB floor */
|
||||
|
||||
/*
|
||||
* Return approximate virtual memory usage based on these assumptions:
|
||||
*
|
||||
* 1) The major SPL consumer of virtual memory is the kmem cache.
|
||||
* 2) Memory allocated with vmem_alloc() is short lived and can be ignored.
|
||||
* 3) Allow a 4MB floor as a generous pad given normal consumption.
|
||||
* 4) The spl_kmem_cache_sem only contends with cache create/destroy.
|
||||
*/
|
||||
size_t
|
||||
vmem_size(vmem_t *vmp, int typemask)
|
||||
{
|
||||
spl_kmem_cache_t *skc;
|
||||
size_t alloc = VMEM_FLOOR_SIZE;
|
||||
|
||||
if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE))
|
||||
return (VMALLOC_TOTAL);
|
||||
|
||||
|
||||
down_read(&spl_kmem_cache_sem);
|
||||
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
|
||||
if (skc->skc_flags & KMC_VMEM)
|
||||
alloc += skc->skc_slab_size * skc->skc_slab_total;
|
||||
}
|
||||
up_read(&spl_kmem_cache_sem);
|
||||
|
||||
if (typemask & VMEM_ALLOC)
|
||||
return (MIN(alloc, VMALLOC_TOTAL));
|
||||
else if (typemask & VMEM_FREE)
|
||||
return (MAX(VMALLOC_TOTAL - alloc, 0));
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(vmem_size);
|
||||
|
||||
/*
|
||||
* Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
|
||||
*/
|
||||
void *
|
||||
spl_vmem_alloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
flags |= KM_VMEM;
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_vmem_alloc);
|
||||
|
||||
void *
|
||||
spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
flags |= (KM_VMEM | KM_ZERO);
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_vmem_zalloc);
|
||||
|
||||
void
|
||||
spl_vmem_free(const void *buf, size_t size)
|
||||
{
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_free_impl(buf, size));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_free_debug(buf, size));
|
||||
#else
|
||||
return (spl_kmem_free_track(buf, size));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_vmem_free);
|
||||
|
||||
int
|
||||
spl_vmem_init(void)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_vmem_fini(void)
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,719 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Vnode Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/cred.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#ifdef HAVE_FDTABLE_HEADER
|
||||
#include <linux/fdtable.h>
|
||||
#endif
|
||||
|
||||
vnode_t *rootdir = (vnode_t *)0xabcd1234;
|
||||
EXPORT_SYMBOL(rootdir);
|
||||
|
||||
static spl_kmem_cache_t *vn_cache;
|
||||
static spl_kmem_cache_t *vn_file_cache;
|
||||
|
||||
static spinlock_t vn_file_lock;
|
||||
static LIST_HEAD(vn_file_list);
|
||||
|
||||
static int
|
||||
spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
|
||||
{
|
||||
int error = -EOPNOTSUPP;
|
||||
|
||||
#ifdef HAVE_FILE_FALLOCATE
|
||||
if (fp->f_op->fallocate)
|
||||
error = fp->f_op->fallocate(fp, mode, offset, len);
|
||||
#else
|
||||
#ifdef HAVE_INODE_FALLOCATE
|
||||
if (fp->f_dentry && fp->f_dentry->d_inode &&
|
||||
fp->f_dentry->d_inode->i_op->fallocate)
|
||||
error = fp->f_dentry->d_inode->i_op->fallocate(
|
||||
fp->f_dentry->d_inode, mode, offset, len);
|
||||
#endif /* HAVE_INODE_FALLOCATE */
|
||||
#endif /* HAVE_FILE_FALLOCATE */
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
spl_filp_fsync(struct file *fp, int sync)
|
||||
{
|
||||
#ifdef HAVE_2ARGS_VFS_FSYNC
|
||||
return (vfs_fsync(fp, sync));
|
||||
#else
|
||||
return (vfs_fsync(fp, (fp)->f_dentry, sync));
|
||||
#endif /* HAVE_2ARGS_VFS_FSYNC */
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
|
||||
{
|
||||
#if defined(HAVE_KERNEL_WRITE_PPOS)
|
||||
return (kernel_write(file, buf, count, pos));
|
||||
#else
|
||||
mm_segment_t saved_fs;
|
||||
ssize_t ret;
|
||||
|
||||
saved_fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
|
||||
ret = vfs_write(file, (__force const char __user *)buf, count, pos);
|
||||
|
||||
set_fs(saved_fs);
|
||||
|
||||
return (ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
|
||||
{
|
||||
#if defined(HAVE_KERNEL_READ_PPOS)
|
||||
return (kernel_read(file, buf, count, pos));
|
||||
#else
|
||||
mm_segment_t saved_fs;
|
||||
ssize_t ret;
|
||||
|
||||
saved_fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
|
||||
ret = vfs_read(file, (void __user *)buf, count, pos);
|
||||
|
||||
set_fs(saved_fs);
|
||||
|
||||
return (ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
vtype_t
|
||||
vn_mode_to_vtype(mode_t mode)
|
||||
{
|
||||
if (S_ISREG(mode))
|
||||
return (VREG);
|
||||
|
||||
if (S_ISDIR(mode))
|
||||
return (VDIR);
|
||||
|
||||
if (S_ISCHR(mode))
|
||||
return (VCHR);
|
||||
|
||||
if (S_ISBLK(mode))
|
||||
return (VBLK);
|
||||
|
||||
if (S_ISFIFO(mode))
|
||||
return (VFIFO);
|
||||
|
||||
if (S_ISLNK(mode))
|
||||
return (VLNK);
|
||||
|
||||
if (S_ISSOCK(mode))
|
||||
return (VSOCK);
|
||||
|
||||
return (VNON);
|
||||
} /* vn_mode_to_vtype() */
|
||||
EXPORT_SYMBOL(vn_mode_to_vtype);
|
||||
|
||||
mode_t
|
||||
vn_vtype_to_mode(vtype_t vtype)
|
||||
{
|
||||
if (vtype == VREG)
|
||||
return (S_IFREG);
|
||||
|
||||
if (vtype == VDIR)
|
||||
return (S_IFDIR);
|
||||
|
||||
if (vtype == VCHR)
|
||||
return (S_IFCHR);
|
||||
|
||||
if (vtype == VBLK)
|
||||
return (S_IFBLK);
|
||||
|
||||
if (vtype == VFIFO)
|
||||
return (S_IFIFO);
|
||||
|
||||
if (vtype == VLNK)
|
||||
return (S_IFLNK);
|
||||
|
||||
if (vtype == VSOCK)
|
||||
return (S_IFSOCK);
|
||||
|
||||
return (VNON);
|
||||
} /* vn_vtype_to_mode() */
|
||||
EXPORT_SYMBOL(vn_vtype_to_mode);
|
||||
|
||||
vnode_t *
|
||||
vn_alloc(int flag)
|
||||
{
|
||||
vnode_t *vp;
|
||||
|
||||
vp = kmem_cache_alloc(vn_cache, flag);
|
||||
if (vp != NULL) {
|
||||
vp->v_file = NULL;
|
||||
vp->v_type = 0;
|
||||
}
|
||||
|
||||
return (vp);
|
||||
} /* vn_alloc() */
|
||||
EXPORT_SYMBOL(vn_alloc);
|
||||
|
||||
void
|
||||
vn_free(vnode_t *vp)
|
||||
{
|
||||
kmem_cache_free(vn_cache, vp);
|
||||
} /* vn_free() */
|
||||
EXPORT_SYMBOL(vn_free);
|
||||
|
||||
int
|
||||
vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
|
||||
int x1, void *x2)
|
||||
{
|
||||
struct file *fp;
|
||||
struct kstat stat;
|
||||
int rc, saved_umask = 0;
|
||||
gfp_t saved_gfp;
|
||||
vnode_t *vp;
|
||||
|
||||
ASSERT(flags & (FWRITE | FREAD));
|
||||
ASSERT(seg == UIO_SYSSPACE);
|
||||
ASSERT(vpp);
|
||||
*vpp = NULL;
|
||||
|
||||
if (!(flags & FCREAT) && (flags & FWRITE))
|
||||
flags |= FEXCL;
|
||||
|
||||
/*
|
||||
* Note for filp_open() the two low bits must be remapped to mean:
|
||||
* 01 - read-only -> 00 read-only
|
||||
* 10 - write-only -> 01 write-only
|
||||
* 11 - read-write -> 10 read-write
|
||||
*/
|
||||
flags--;
|
||||
|
||||
if (flags & FCREAT)
|
||||
saved_umask = xchg(¤t->fs->umask, 0);
|
||||
|
||||
fp = filp_open(path, flags, mode);
|
||||
|
||||
if (flags & FCREAT)
|
||||
(void) xchg(¤t->fs->umask, saved_umask);
|
||||
|
||||
if (IS_ERR(fp))
|
||||
return (-PTR_ERR(fp));
|
||||
|
||||
#if defined(HAVE_4ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
|
||||
#elif defined(HAVE_2ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&fp->f_path, &stat);
|
||||
#else
|
||||
rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
|
||||
#endif
|
||||
if (rc) {
|
||||
filp_close(fp, 0);
|
||||
return (-rc);
|
||||
}
|
||||
|
||||
vp = vn_alloc(KM_SLEEP);
|
||||
if (!vp) {
|
||||
filp_close(fp, 0);
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
saved_gfp = mapping_gfp_mask(fp->f_mapping);
|
||||
mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
|
||||
|
||||
mutex_enter(&vp->v_lock);
|
||||
vp->v_type = vn_mode_to_vtype(stat.mode);
|
||||
vp->v_file = fp;
|
||||
vp->v_gfp_mask = saved_gfp;
|
||||
*vpp = vp;
|
||||
mutex_exit(&vp->v_lock);
|
||||
|
||||
return (0);
|
||||
} /* vn_open() */
|
||||
EXPORT_SYMBOL(vn_open);
|
||||
|
||||
int
|
||||
vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
|
||||
vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
|
||||
{
|
||||
char *realpath;
|
||||
int len, rc;
|
||||
|
||||
ASSERT(vp == rootdir);
|
||||
|
||||
len = strlen(path) + 2;
|
||||
realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
|
||||
if (!realpath)
|
||||
return (ENOMEM);
|
||||
|
||||
(void) snprintf(realpath, len, "/%s", path);
|
||||
rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
|
||||
kfree(realpath);
|
||||
|
||||
return (rc);
|
||||
} /* vn_openat() */
|
||||
EXPORT_SYMBOL(vn_openat);
|
||||
|
||||
int
|
||||
vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
|
||||
uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
|
||||
{
|
||||
struct file *fp = vp->v_file;
|
||||
loff_t offset = off;
|
||||
int rc;
|
||||
|
||||
ASSERT(uio == UIO_WRITE || uio == UIO_READ);
|
||||
ASSERT(seg == UIO_SYSSPACE);
|
||||
ASSERT((ioflag & ~FAPPEND) == 0);
|
||||
|
||||
if (ioflag & FAPPEND)
|
||||
offset = fp->f_pos;
|
||||
|
||||
if (uio & UIO_WRITE)
|
||||
rc = spl_kernel_write(fp, addr, len, &offset);
|
||||
else
|
||||
rc = spl_kernel_read(fp, addr, len, &offset);
|
||||
|
||||
fp->f_pos = offset;
|
||||
|
||||
if (rc < 0)
|
||||
return (-rc);
|
||||
|
||||
if (residp) {
|
||||
*residp = len - rc;
|
||||
} else {
|
||||
if (rc != len)
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
return (0);
|
||||
} /* vn_rdwr() */
|
||||
EXPORT_SYMBOL(vn_rdwr);
|
||||
|
||||
int
|
||||
vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
|
||||
{
|
||||
int rc;
|
||||
|
||||
ASSERT(vp);
|
||||
ASSERT(vp->v_file);
|
||||
|
||||
mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
|
||||
rc = filp_close(vp->v_file, 0);
|
||||
vn_free(vp);
|
||||
|
||||
return (-rc);
|
||||
} /* vn_close() */
|
||||
EXPORT_SYMBOL(vn_close);
|
||||
|
||||
/*
|
||||
* vn_seek() does not actually seek it only performs bounds checking on the
|
||||
* proposed seek. We perform minimal checking and allow vn_rdwr() to catch
|
||||
* anything more serious.
|
||||
*/
|
||||
int
|
||||
vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
|
||||
{
|
||||
return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
|
||||
}
|
||||
EXPORT_SYMBOL(vn_seek);
|
||||
|
||||
int
|
||||
vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
|
||||
{
|
||||
struct file *fp;
|
||||
struct kstat stat;
|
||||
int rc;
|
||||
|
||||
ASSERT(vp);
|
||||
ASSERT(vp->v_file);
|
||||
ASSERT(vap);
|
||||
|
||||
fp = vp->v_file;
|
||||
|
||||
#if defined(HAVE_4ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
|
||||
AT_STATX_SYNC_AS_STAT);
|
||||
#elif defined(HAVE_2ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&fp->f_path, &stat);
|
||||
#else
|
||||
rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
|
||||
#endif
|
||||
if (rc)
|
||||
return (-rc);
|
||||
|
||||
vap->va_type = vn_mode_to_vtype(stat.mode);
|
||||
vap->va_mode = stat.mode;
|
||||
vap->va_uid = KUID_TO_SUID(stat.uid);
|
||||
vap->va_gid = KGID_TO_SGID(stat.gid);
|
||||
vap->va_fsid = 0;
|
||||
vap->va_nodeid = stat.ino;
|
||||
vap->va_nlink = stat.nlink;
|
||||
vap->va_size = stat.size;
|
||||
vap->va_blksize = stat.blksize;
|
||||
vap->va_atime = stat.atime;
|
||||
vap->va_mtime = stat.mtime;
|
||||
vap->va_ctime = stat.ctime;
|
||||
vap->va_rdev = stat.rdev;
|
||||
vap->va_nblocks = stat.blocks;
|
||||
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(vn_getattr);
|
||||
|
||||
int
|
||||
vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
|
||||
{
|
||||
int datasync = 0;
|
||||
int error;
|
||||
int fstrans;
|
||||
|
||||
ASSERT(vp);
|
||||
ASSERT(vp->v_file);
|
||||
|
||||
if (flags & FDSYNC)
|
||||
datasync = 1;
|
||||
|
||||
/*
|
||||
* May enter XFS which generates a warning when PF_FSTRANS is set.
|
||||
* To avoid this the flag is cleared over vfs_sync() and then reset.
|
||||
*/
|
||||
fstrans = __spl_pf_fstrans_check();
|
||||
if (fstrans)
|
||||
current->flags &= ~(__SPL_PF_FSTRANS);
|
||||
|
||||
error = -spl_filp_fsync(vp->v_file, datasync);
|
||||
if (fstrans)
|
||||
current->flags |= __SPL_PF_FSTRANS;
|
||||
|
||||
return (error);
|
||||
} /* vn_fsync() */
|
||||
EXPORT_SYMBOL(vn_fsync);
|
||||
|
||||
int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
|
||||
offset_t offset, void *x6, void *x7)
|
||||
{
|
||||
int error = EOPNOTSUPP;
|
||||
#ifdef FALLOC_FL_PUNCH_HOLE
|
||||
int fstrans;
|
||||
#endif
|
||||
|
||||
if (cmd != F_FREESP || bfp->l_whence != SEEK_SET)
|
||||
return (EOPNOTSUPP);
|
||||
|
||||
ASSERT(vp);
|
||||
ASSERT(vp->v_file);
|
||||
ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
|
||||
|
||||
#ifdef FALLOC_FL_PUNCH_HOLE
|
||||
/*
|
||||
* May enter XFS which generates a warning when PF_FSTRANS is set.
|
||||
* To avoid this the flag is cleared over vfs_sync() and then reset.
|
||||
*/
|
||||
fstrans = __spl_pf_fstrans_check();
|
||||
if (fstrans)
|
||||
current->flags &= ~(__SPL_PF_FSTRANS);
|
||||
|
||||
/*
|
||||
* When supported by the underlying file system preferentially
|
||||
* use the fallocate() callback to preallocate the space.
|
||||
*/
|
||||
error = -spl_filp_fallocate(vp->v_file,
|
||||
FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||
bfp->l_start, bfp->l_len);
|
||||
|
||||
if (fstrans)
|
||||
current->flags |= __SPL_PF_FSTRANS;
|
||||
|
||||
if (error == 0)
|
||||
return (0);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INODE_TRUNCATE_RANGE
|
||||
if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
|
||||
vp->v_file->f_dentry->d_inode->i_op &&
|
||||
vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
|
||||
off_t end = bfp->l_start + bfp->l_len;
|
||||
/*
|
||||
* Judging from the code in shmem_truncate_range(),
|
||||
* it seems the kernel expects the end offset to be
|
||||
* inclusive and aligned to the end of a page.
|
||||
*/
|
||||
if (end % PAGE_SIZE != 0) {
|
||||
end &= ~(off_t)(PAGE_SIZE - 1);
|
||||
if (end <= bfp->l_start)
|
||||
return (0);
|
||||
}
|
||||
--end;
|
||||
|
||||
vp->v_file->f_dentry->d_inode->i_op->truncate_range(
|
||||
vp->v_file->f_dentry->d_inode, bfp->l_start, end);
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
EXPORT_SYMBOL(vn_space);
|
||||
|
||||
/* Function must be called while holding the vn_file_lock */
|
||||
static file_t *
|
||||
file_find(int fd, struct task_struct *task)
|
||||
{
|
||||
file_t *fp;
|
||||
|
||||
list_for_each_entry(fp, &vn_file_list, f_list) {
|
||||
if (fd == fp->f_fd && fp->f_task == task) {
|
||||
ASSERT(atomic_read(&fp->f_ref) != 0);
|
||||
return (fp);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
} /* file_find() */
|
||||
|
||||
file_t *
|
||||
vn_getf(int fd)
|
||||
{
|
||||
struct kstat stat;
|
||||
struct file *lfp;
|
||||
file_t *fp;
|
||||
vnode_t *vp;
|
||||
int rc = 0;
|
||||
|
||||
if (fd < 0)
|
||||
return (NULL);
|
||||
|
||||
/* Already open just take an extra reference */
|
||||
spin_lock(&vn_file_lock);
|
||||
|
||||
fp = file_find(fd, current);
|
||||
if (fp) {
|
||||
lfp = fget(fd);
|
||||
fput(fp->f_file);
|
||||
/*
|
||||
* areleasef() can cause us to see a stale reference when
|
||||
* userspace has reused a file descriptor before areleasef()
|
||||
* has run. fput() the stale reference and replace it. We
|
||||
* retain the original reference count such that the concurrent
|
||||
* areleasef() will decrement its reference and terminate.
|
||||
*/
|
||||
if (lfp != fp->f_file) {
|
||||
fp->f_file = lfp;
|
||||
fp->f_vnode->v_file = lfp;
|
||||
}
|
||||
atomic_inc(&fp->f_ref);
|
||||
spin_unlock(&vn_file_lock);
|
||||
return (fp);
|
||||
}
|
||||
|
||||
spin_unlock(&vn_file_lock);
|
||||
|
||||
/* File was not yet opened create the object and setup */
|
||||
fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
|
||||
if (fp == NULL)
|
||||
goto out;
|
||||
|
||||
mutex_enter(&fp->f_lock);
|
||||
|
||||
fp->f_fd = fd;
|
||||
fp->f_task = current;
|
||||
fp->f_offset = 0;
|
||||
atomic_inc(&fp->f_ref);
|
||||
|
||||
lfp = fget(fd);
|
||||
if (lfp == NULL)
|
||||
goto out_mutex;
|
||||
|
||||
vp = vn_alloc(KM_SLEEP);
|
||||
if (vp == NULL)
|
||||
goto out_fget;
|
||||
|
||||
#if defined(HAVE_4ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
|
||||
AT_STATX_SYNC_AS_STAT);
|
||||
#elif defined(HAVE_2ARGS_VFS_GETATTR)
|
||||
rc = vfs_getattr(&lfp->f_path, &stat);
|
||||
#else
|
||||
rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
|
||||
#endif
|
||||
if (rc)
|
||||
goto out_vnode;
|
||||
|
||||
mutex_enter(&vp->v_lock);
|
||||
vp->v_type = vn_mode_to_vtype(stat.mode);
|
||||
vp->v_file = lfp;
|
||||
mutex_exit(&vp->v_lock);
|
||||
|
||||
fp->f_vnode = vp;
|
||||
fp->f_file = lfp;
|
||||
|
||||
/* Put it on the tracking list */
|
||||
spin_lock(&vn_file_lock);
|
||||
list_add(&fp->f_list, &vn_file_list);
|
||||
spin_unlock(&vn_file_lock);
|
||||
|
||||
mutex_exit(&fp->f_lock);
|
||||
return (fp);
|
||||
|
||||
out_vnode:
|
||||
vn_free(vp);
|
||||
out_fget:
|
||||
fput(lfp);
|
||||
out_mutex:
|
||||
mutex_exit(&fp->f_lock);
|
||||
kmem_cache_free(vn_file_cache, fp);
|
||||
out:
|
||||
return (NULL);
|
||||
} /* getf() */
|
||||
EXPORT_SYMBOL(getf);
|
||||
|
||||
static void releasef_locked(file_t *fp)
|
||||
{
|
||||
ASSERT(fp->f_file);
|
||||
ASSERT(fp->f_vnode);
|
||||
|
||||
/* Unlinked from list, no refs, safe to free outside mutex */
|
||||
fput(fp->f_file);
|
||||
vn_free(fp->f_vnode);
|
||||
|
||||
kmem_cache_free(vn_file_cache, fp);
|
||||
}
|
||||
|
||||
void
|
||||
vn_releasef(int fd)
|
||||
{
|
||||
areleasef(fd, P_FINFO(current));
|
||||
}
|
||||
EXPORT_SYMBOL(releasef);
|
||||
|
||||
void
|
||||
vn_areleasef(int fd, uf_info_t *fip)
|
||||
{
|
||||
file_t *fp;
|
||||
struct task_struct *task = (struct task_struct *)fip;
|
||||
|
||||
if (fd < 0)
|
||||
return;
|
||||
|
||||
spin_lock(&vn_file_lock);
|
||||
fp = file_find(fd, task);
|
||||
if (fp) {
|
||||
atomic_dec(&fp->f_ref);
|
||||
if (atomic_read(&fp->f_ref) > 0) {
|
||||
spin_unlock(&vn_file_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_del(&fp->f_list);
|
||||
releasef_locked(fp);
|
||||
}
|
||||
spin_unlock(&vn_file_lock);
|
||||
} /* releasef() */
|
||||
EXPORT_SYMBOL(areleasef);
|
||||
|
||||
static int
|
||||
vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
|
||||
{
|
||||
struct vnode *vp = buf;
|
||||
|
||||
mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
return (0);
|
||||
} /* vn_cache_constructor() */
|
||||
|
||||
static void
|
||||
vn_cache_destructor(void *buf, void *cdrarg)
|
||||
{
|
||||
struct vnode *vp = buf;
|
||||
|
||||
mutex_destroy(&vp->v_lock);
|
||||
} /* vn_cache_destructor() */
|
||||
|
||||
static int
|
||||
vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
|
||||
{
|
||||
file_t *fp = buf;
|
||||
|
||||
atomic_set(&fp->f_ref, 0);
|
||||
mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
INIT_LIST_HEAD(&fp->f_list);
|
||||
|
||||
return (0);
|
||||
} /* vn_file_cache_constructor() */
|
||||
|
||||
static void
|
||||
vn_file_cache_destructor(void *buf, void *cdrarg)
|
||||
{
|
||||
file_t *fp = buf;
|
||||
|
||||
mutex_destroy(&fp->f_lock);
|
||||
} /* vn_file_cache_destructor() */
|
||||
|
||||
int
|
||||
spl_vn_init(void)
|
||||
{
|
||||
spin_lock_init(&vn_file_lock);
|
||||
|
||||
vn_cache = kmem_cache_create("spl_vn_cache",
|
||||
sizeof (struct vnode), 64, vn_cache_constructor,
|
||||
vn_cache_destructor, NULL, NULL, NULL, 0);
|
||||
|
||||
vn_file_cache = kmem_cache_create("spl_vn_file_cache",
|
||||
sizeof (file_t), 64, vn_file_cache_constructor,
|
||||
vn_file_cache_destructor, NULL, NULL, NULL, 0);
|
||||
|
||||
return (0);
|
||||
} /* spl_vn_init() */
|
||||
|
||||
void
|
||||
spl_vn_fini(void)
|
||||
{
|
||||
file_t *fp, *next_fp;
|
||||
int leaked = 0;
|
||||
|
||||
spin_lock(&vn_file_lock);
|
||||
|
||||
list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
|
||||
list_del(&fp->f_list);
|
||||
releasef_locked(fp);
|
||||
leaked++;
|
||||
}
|
||||
|
||||
spin_unlock(&vn_file_lock);
|
||||
|
||||
if (leaked > 0)
|
||||
printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
|
||||
|
||||
kmem_cache_destroy(vn_file_cache);
|
||||
kmem_cache_destroy(vn_cache);
|
||||
} /* spl_vn_fini() */
|
||||
@@ -0,0 +1,513 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2010 Sun Microsystems, Inc.
|
||||
* Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) XDR Implementation.
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <rpc/xdr.h>
|
||||
|
||||
/*
|
||||
* SPL's XDR mem implementation.
|
||||
*
|
||||
* This is used by libnvpair to serialize/deserialize the name-value pair data
|
||||
* structures into byte arrays in a well-defined and portable manner.
|
||||
*
|
||||
* These data structures are used by the DMU/ZFS to flexibly manipulate various
|
||||
* information in memory and later serialize it/deserialize it to disk.
|
||||
* Examples of usages include the pool configuration, lists of pool and dataset
|
||||
* properties, etc.
|
||||
*
|
||||
* Reference documentation for the XDR representation and XDR operations can be
|
||||
* found in RFC 1832 and xdr(3), respectively.
|
||||
*
|
||||
* === Implementation shortcomings ===
|
||||
*
|
||||
* It is assumed that the following C types have the following sizes:
|
||||
*
|
||||
* char/unsigned char: 1 byte
|
||||
* short/unsigned short: 2 bytes
|
||||
* int/unsigned int: 4 bytes
|
||||
* longlong_t/u_longlong_t: 8 bytes
|
||||
*
|
||||
* The C standard allows these types to be larger (and in the case of ints,
|
||||
* shorter), so if that is the case on some compiler/architecture, the build
|
||||
* will fail (on purpose).
|
||||
*
|
||||
* If someone wants to fix the code to work properly on such environments, then:
|
||||
*
|
||||
* 1) Preconditions should be added to xdrmem_enc functions to make sure the
|
||||
* caller doesn't pass arguments which exceed the expected range.
|
||||
* 2) Functions which take signed integers should be changed to properly do
|
||||
* sign extension.
|
||||
* 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
|
||||
* problems than this implementation.
|
||||
*
|
||||
* It is also assumed that:
|
||||
*
|
||||
* 1) Chars have 8 bits.
|
||||
* 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
|
||||
* memcpy, memset and memcmp.
|
||||
* 3) Arrays passed to xdr_array() are packed and the compiler/architecture
|
||||
* supports element-sized-aligned memory accesses.
|
||||
* 4) Negative integers are natively stored in two's complement binary
|
||||
* representation.
|
||||
*
|
||||
* No checks are done for the 4 assumptions above, though.
|
||||
*
|
||||
* === Caller expectations ===
|
||||
*
|
||||
* Existing documentation does not describe the semantics of XDR operations very
|
||||
* well. Therefore, some assumptions about failure semantics will be made and
|
||||
* will be described below:
|
||||
*
|
||||
* 1) If any encoding operation fails (e.g., due to lack of buffer space), the
|
||||
* the stream should be considered valid only up to the encoding operation
|
||||
* previous to the one that first failed. However, the stream size as returned
|
||||
* by xdr_control() cannot be considered to be strictly correct (it may be
|
||||
* bigger).
|
||||
*
|
||||
* Putting it another way, if there is an encoding failure it's undefined
|
||||
* whether anything is added to the stream in that operation and therefore
|
||||
* neither xdr_control() nor future encoding operations on the same stream can
|
||||
* be relied upon to produce correct results.
|
||||
*
|
||||
* 2) If a decoding operation fails, it's undefined whether anything will be
|
||||
* decoded into passed buffers/pointers during that operation, or what the
|
||||
* values on those buffers will look like.
|
||||
*
|
||||
* Future decoding operations on the same stream will also have similar
|
||||
* undefined behavior.
|
||||
*
|
||||
* 3) When the first decoding operation fails it is OK to trust the results of
|
||||
* previous decoding operations on the same stream, as long as the caller
|
||||
* expects a failure to be possible (e.g. due to end-of-stream).
|
||||
*
|
||||
* However, this is highly discouraged because the caller should know the
|
||||
* stream size and should be coded to expect any decoding failure to be data
|
||||
* corruption due to hardware, accidental or even malicious causes, which should
|
||||
* be handled gracefully in all cases.
|
||||
*
|
||||
* In very rare situations where there are strong reasons to believe the data
|
||||
* can be trusted to be valid and non-tampered with, then the caller may assume
|
||||
* a decoding failure to be a bug (e.g. due to mismatched data types) and may
|
||||
* fail non-gracefully.
|
||||
*
|
||||
* 4) Non-zero padding bytes will cause the decoding operation to fail.
|
||||
*
|
||||
* 5) Zero bytes on string types will also cause the decoding operation to fail.
|
||||
*
|
||||
* 6) It is assumed that either the pointer to the stream buffer given by the
|
||||
* caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
|
||||
* memory accesses.
|
||||
*
|
||||
* 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
|
||||
*
|
||||
* 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
|
||||
* space or MMIO space), the computer may explode.
|
||||
*/
|
||||
|
||||
static struct xdr_ops xdrmem_encode_ops;
|
||||
static struct xdr_ops xdrmem_decode_ops;
|
||||
|
||||
void
|
||||
xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
|
||||
const enum xdr_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case XDR_ENCODE:
|
||||
xdrs->x_ops = &xdrmem_encode_ops;
|
||||
break;
|
||||
case XDR_DECODE:
|
||||
xdrs->x_ops = &xdrmem_decode_ops;
|
||||
break;
|
||||
default:
|
||||
xdrs->x_ops = NULL; /* Let the caller know we failed */
|
||||
return;
|
||||
}
|
||||
|
||||
xdrs->x_op = op;
|
||||
xdrs->x_addr = addr;
|
||||
xdrs->x_addr_end = addr + size;
|
||||
|
||||
if (xdrs->x_addr_end < xdrs->x_addr) {
|
||||
xdrs->x_ops = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(xdrmem_create);
|
||||
|
||||
static bool_t
|
||||
xdrmem_control(XDR *xdrs, int req, void *info)
|
||||
{
|
||||
struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
|
||||
|
||||
if (req != XDR_GET_BYTES_AVAIL)
|
||||
return (FALSE);
|
||||
|
||||
rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
|
||||
rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
|
||||
{
|
||||
uint_t size = roundup(cnt, 4);
|
||||
uint_t pad;
|
||||
|
||||
if (size < cnt)
|
||||
return (FALSE); /* Integer overflow */
|
||||
|
||||
if (xdrs->x_addr > xdrs->x_addr_end)
|
||||
return (FALSE);
|
||||
|
||||
if (xdrs->x_addr_end - xdrs->x_addr < size)
|
||||
return (FALSE);
|
||||
|
||||
memcpy(xdrs->x_addr, cp, cnt);
|
||||
|
||||
xdrs->x_addr += cnt;
|
||||
|
||||
pad = size - cnt;
|
||||
if (pad > 0) {
|
||||
memset(xdrs->x_addr, 0, pad);
|
||||
xdrs->x_addr += pad;
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
|
||||
{
|
||||
static uint32_t zero = 0;
|
||||
uint_t size = roundup(cnt, 4);
|
||||
uint_t pad;
|
||||
|
||||
if (size < cnt)
|
||||
return (FALSE); /* Integer overflow */
|
||||
|
||||
if (xdrs->x_addr > xdrs->x_addr_end)
|
||||
return (FALSE);
|
||||
|
||||
if (xdrs->x_addr_end - xdrs->x_addr < size)
|
||||
return (FALSE);
|
||||
|
||||
memcpy(cp, xdrs->x_addr, cnt);
|
||||
xdrs->x_addr += cnt;
|
||||
|
||||
pad = size - cnt;
|
||||
if (pad > 0) {
|
||||
/* An inverted memchr() would be useful here... */
|
||||
if (memcmp(&zero, xdrs->x_addr, pad) != 0)
|
||||
return (FALSE);
|
||||
|
||||
xdrs->x_addr += pad;
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
|
||||
{
|
||||
if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
|
||||
return (FALSE);
|
||||
|
||||
*((uint32_t *)xdrs->x_addr) = cpu_to_be32(val);
|
||||
|
||||
xdrs->x_addr += sizeof (uint32_t);
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
|
||||
{
|
||||
if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
|
||||
return (FALSE);
|
||||
|
||||
*val = be32_to_cpu(*((uint32_t *)xdrs->x_addr));
|
||||
|
||||
xdrs->x_addr += sizeof (uint32_t);
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_char(XDR *xdrs, char *cp)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
BUILD_BUG_ON(sizeof (char) != 1);
|
||||
val = *((unsigned char *) cp);
|
||||
|
||||
return (xdrmem_enc_uint32(xdrs, val));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_char(XDR *xdrs, char *cp)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
BUILD_BUG_ON(sizeof (char) != 1);
|
||||
|
||||
if (!xdrmem_dec_uint32(xdrs, &val))
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* If any of the 3 other bytes are non-zero then val will be greater
|
||||
* than 0xff and we fail because according to the RFC, this block does
|
||||
* not have a char encoded in it.
|
||||
*/
|
||||
if (val > 0xff)
|
||||
return (FALSE);
|
||||
|
||||
*((unsigned char *) cp) = val;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof (unsigned short) != 2);
|
||||
|
||||
return (xdrmem_enc_uint32(xdrs, *usp));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
BUILD_BUG_ON(sizeof (unsigned short) != 2);
|
||||
|
||||
if (!xdrmem_dec_uint32(xdrs, &val))
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* Short ints are not in the RFC, but we assume similar logic as in
|
||||
* xdrmem_dec_char().
|
||||
*/
|
||||
if (val > 0xffff)
|
||||
return (FALSE);
|
||||
|
||||
*usp = val;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_uint(XDR *xdrs, unsigned *up)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof (unsigned) != 4);
|
||||
|
||||
return (xdrmem_enc_uint32(xdrs, *up));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_uint(XDR *xdrs, unsigned *up)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof (unsigned) != 4);
|
||||
|
||||
return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
|
||||
|
||||
if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
|
||||
return (FALSE);
|
||||
|
||||
return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
|
||||
{
|
||||
uint32_t low, high;
|
||||
|
||||
BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
|
||||
|
||||
if (!xdrmem_dec_uint32(xdrs, &high))
|
||||
return (FALSE);
|
||||
if (!xdrmem_dec_uint32(xdrs, &low))
|
||||
return (FALSE);
|
||||
|
||||
*ullp = ((u_longlong_t)high << 32) | low;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
|
||||
const uint_t elsize, const xdrproc_t elproc)
|
||||
{
|
||||
uint_t i;
|
||||
caddr_t addr = *arrp;
|
||||
|
||||
if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
|
||||
return (FALSE);
|
||||
|
||||
if (!xdrmem_enc_uint(xdrs, sizep))
|
||||
return (FALSE);
|
||||
|
||||
for (i = 0; i < *sizep; i++) {
|
||||
if (!elproc(xdrs, addr))
|
||||
return (FALSE);
|
||||
addr += elsize;
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
|
||||
const uint_t elsize, const xdrproc_t elproc)
|
||||
{
|
||||
uint_t i, size;
|
||||
bool_t alloc = FALSE;
|
||||
caddr_t addr;
|
||||
|
||||
if (!xdrmem_dec_uint(xdrs, sizep))
|
||||
return (FALSE);
|
||||
|
||||
size = *sizep;
|
||||
|
||||
if (size > maxsize || size > UINT_MAX / elsize)
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* The Solaris man page says: "If *arrp is NULL when decoding,
|
||||
* xdr_array() allocates memory and *arrp points to it".
|
||||
*/
|
||||
if (*arrp == NULL) {
|
||||
BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
|
||||
|
||||
*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
|
||||
if (*arrp == NULL)
|
||||
return (FALSE);
|
||||
|
||||
alloc = TRUE;
|
||||
}
|
||||
|
||||
addr = *arrp;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (!elproc(xdrs, addr)) {
|
||||
if (alloc)
|
||||
kmem_free(*arrp, size * elsize);
|
||||
return (FALSE);
|
||||
}
|
||||
addr += elsize;
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
|
||||
{
|
||||
size_t slen = strlen(*sp);
|
||||
uint_t len;
|
||||
|
||||
if (slen > maxsize)
|
||||
return (FALSE);
|
||||
|
||||
len = slen;
|
||||
|
||||
if (!xdrmem_enc_uint(xdrs, &len))
|
||||
return (FALSE);
|
||||
|
||||
return (xdrmem_enc_bytes(xdrs, *sp, len));
|
||||
}
|
||||
|
||||
static bool_t
|
||||
xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
|
||||
{
|
||||
uint_t size;
|
||||
bool_t alloc = FALSE;
|
||||
|
||||
if (!xdrmem_dec_uint(xdrs, &size))
|
||||
return (FALSE);
|
||||
|
||||
if (size > maxsize || size > UINT_MAX - 1)
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* Solaris man page: "If *sp is NULL when decoding, xdr_string()
|
||||
* allocates memory and *sp points to it".
|
||||
*/
|
||||
if (*sp == NULL) {
|
||||
BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
|
||||
|
||||
*sp = kmem_alloc(size + 1, KM_NOSLEEP);
|
||||
if (*sp == NULL)
|
||||
return (FALSE);
|
||||
|
||||
alloc = TRUE;
|
||||
}
|
||||
|
||||
if (!xdrmem_dec_bytes(xdrs, *sp, size))
|
||||
goto fail;
|
||||
|
||||
if (memchr(*sp, 0, size) != NULL)
|
||||
goto fail;
|
||||
|
||||
(*sp)[size] = '\0';
|
||||
|
||||
return (TRUE);
|
||||
|
||||
fail:
|
||||
if (alloc)
|
||||
kmem_free(*sp, size + 1);
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
static struct xdr_ops xdrmem_encode_ops = {
|
||||
.xdr_control = xdrmem_control,
|
||||
.xdr_char = xdrmem_enc_char,
|
||||
.xdr_u_short = xdrmem_enc_ushort,
|
||||
.xdr_u_int = xdrmem_enc_uint,
|
||||
.xdr_u_longlong_t = xdrmem_enc_ulonglong,
|
||||
.xdr_opaque = xdrmem_enc_bytes,
|
||||
.xdr_string = xdr_enc_string,
|
||||
.xdr_array = xdr_enc_array
|
||||
};
|
||||
|
||||
static struct xdr_ops xdrmem_decode_ops = {
|
||||
.xdr_control = xdrmem_control,
|
||||
.xdr_char = xdrmem_dec_char,
|
||||
.xdr_u_short = xdrmem_dec_ushort,
|
||||
.xdr_u_int = xdrmem_dec_uint,
|
||||
.xdr_u_longlong_t = xdrmem_dec_ulonglong,
|
||||
.xdr_opaque = xdrmem_dec_bytes,
|
||||
.xdr_string = xdr_dec_string,
|
||||
.xdr_array = xdr_dec_array
|
||||
};
|
||||
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
* For details, see <http://zfsonlinux.org/>.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* z_compress_level/z_uncompress are nearly identical copies of the
|
||||
* compress2/uncompress functions provided by the official zlib package
|
||||
* available at http://zlib.net/. The only changes made we to slightly
|
||||
* adapt the functions called to match the linux kernel implementation
|
||||
* of zlib. The full zlib license follows:
|
||||
*
|
||||
* zlib.h -- interface of the 'zlib' general purpose compression library
|
||||
* version 1.2.5, April 19th, 2010
|
||||
*
|
||||
* Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*
|
||||
* Jean-loup Gailly
|
||||
* Mark Adler
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/zmod.h>
|
||||
|
||||
static spl_kmem_cache_t *zlib_workspace_cache;
|
||||
|
||||
/*
|
||||
* A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
|
||||
* and vfree for every call. Using a kmem_cache also has the advantage
|
||||
* that improves the odds that the memory used will be local to this cpu.
|
||||
* To further improve things it might be wise to create a dedicated per-cpu
|
||||
* workspace for use. This would take some additional care because we then
|
||||
* must disable preemption around the critical section, and verify that
|
||||
* zlib_deflate* and zlib_inflate* never internally call schedule().
|
||||
*/
|
||||
static void *
|
||||
zlib_workspace_alloc(int flags)
|
||||
{
|
||||
return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
|
||||
}
|
||||
|
||||
static void
|
||||
zlib_workspace_free(void *workspace)
|
||||
{
|
||||
kmem_cache_free(zlib_workspace_cache, workspace);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compresses the source buffer into the destination buffer. The level
|
||||
* parameter has the same meaning as in deflateInit. sourceLen is the byte
|
||||
* length of the source buffer. Upon entry, destLen is the total size of the
|
||||
* destination buffer, which must be at least 0.1% larger than sourceLen plus
|
||||
* 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
|
||||
*
|
||||
* compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
|
||||
* memory, Z_BUF_ERROR if there was not enough room in the output buffer,
|
||||
* Z_STREAM_ERROR if the level parameter is invalid.
|
||||
*/
|
||||
int
|
||||
z_compress_level(void *dest, size_t *destLen, const void *source,
|
||||
size_t sourceLen, int level)
|
||||
{
|
||||
z_stream stream;
|
||||
int err;
|
||||
|
||||
stream.next_in = (Byte *)source;
|
||||
stream.avail_in = (uInt)sourceLen;
|
||||
stream.next_out = dest;
|
||||
stream.avail_out = (uInt)*destLen;
|
||||
|
||||
if ((size_t)stream.avail_out != *destLen)
|
||||
return (Z_BUF_ERROR);
|
||||
|
||||
stream.workspace = zlib_workspace_alloc(KM_SLEEP);
|
||||
if (!stream.workspace)
|
||||
return (Z_MEM_ERROR);
|
||||
|
||||
err = zlib_deflateInit(&stream, level);
|
||||
if (err != Z_OK) {
|
||||
zlib_workspace_free(stream.workspace);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = zlib_deflate(&stream, Z_FINISH);
|
||||
if (err != Z_STREAM_END) {
|
||||
zlib_deflateEnd(&stream);
|
||||
zlib_workspace_free(stream.workspace);
|
||||
return (err == Z_OK ? Z_BUF_ERROR : err);
|
||||
}
|
||||
*destLen = stream.total_out;
|
||||
|
||||
err = zlib_deflateEnd(&stream);
|
||||
zlib_workspace_free(stream.workspace);
|
||||
|
||||
return (err);
|
||||
}
|
||||
EXPORT_SYMBOL(z_compress_level);
|
||||
|
||||
/*
|
||||
* Decompresses the source buffer into the destination buffer. sourceLen is
|
||||
* the byte length of the source buffer. Upon entry, destLen is the total
|
||||
* size of the destination buffer, which must be large enough to hold the
|
||||
* entire uncompressed data. (The size of the uncompressed data must have
|
||||
* been saved previously by the compressor and transmitted to the decompressor
|
||||
* by some mechanism outside the scope of this compression library.)
|
||||
* Upon exit, destLen is the actual size of the compressed buffer.
|
||||
* This function can be used to decompress a whole file at once if the
|
||||
* input file is mmap'ed.
|
||||
*
|
||||
* uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
|
||||
* enough memory, Z_BUF_ERROR if there was not enough room in the output
|
||||
* buffer, or Z_DATA_ERROR if the input data was corrupted.
|
||||
*/
|
||||
int
|
||||
z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
|
||||
{
|
||||
z_stream stream;
|
||||
int err;
|
||||
|
||||
stream.next_in = (Byte *)source;
|
||||
stream.avail_in = (uInt)sourceLen;
|
||||
stream.next_out = dest;
|
||||
stream.avail_out = (uInt)*destLen;
|
||||
|
||||
if ((size_t)stream.avail_out != *destLen)
|
||||
return (Z_BUF_ERROR);
|
||||
|
||||
stream.workspace = zlib_workspace_alloc(KM_SLEEP);
|
||||
if (!stream.workspace)
|
||||
return (Z_MEM_ERROR);
|
||||
|
||||
err = zlib_inflateInit(&stream);
|
||||
if (err != Z_OK) {
|
||||
zlib_workspace_free(stream.workspace);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = zlib_inflate(&stream, Z_FINISH);
|
||||
if (err != Z_STREAM_END) {
|
||||
zlib_inflateEnd(&stream);
|
||||
zlib_workspace_free(stream.workspace);
|
||||
|
||||
if (err == Z_NEED_DICT ||
|
||||
(err == Z_BUF_ERROR && stream.avail_in == 0))
|
||||
return (Z_DATA_ERROR);
|
||||
|
||||
return (err);
|
||||
}
|
||||
*destLen = stream.total_out;
|
||||
|
||||
err = zlib_inflateEnd(&stream);
|
||||
zlib_workspace_free(stream.workspace);
|
||||
|
||||
return (err);
|
||||
}
|
||||
EXPORT_SYMBOL(z_uncompress);
|
||||
|
||||
int
|
||||
spl_zlib_init(void)
|
||||
{
|
||||
int size;
|
||||
|
||||
size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize());
|
||||
|
||||
zlib_workspace_cache = kmem_cache_create(
|
||||
"spl_zlib_workspace_cache",
|
||||
size, 0, NULL, NULL, NULL, NULL, NULL,
|
||||
KMC_VMEM);
|
||||
if (!zlib_workspace_cache)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_zlib_fini(void)
|
||||
{
|
||||
kmem_cache_destroy(zlib_workspace_cache);
|
||||
zlib_workspace_cache = NULL;
|
||||
}
|
||||
Reference in New Issue
Block a user