mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
d2a12f9e2a
The ZAP code was written before we allowed c99 in the Solaris kernel. We should change it to take advantage of being able to declare variables where they are first used. This reduces variable scope and means less scrolling to find the type of variables. Authored by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Steve Gonczi <steve.gonczi@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed-by: George Melikov <mail@gmelikov.ru> Approved by: Dan McDonald <danmcd@joyent.com> Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://illumos.org/issues/9328 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/76ead05 Closes #7578
853 lines
23 KiB
C
853 lines
23 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
|
* Copyright 2017 Nexenta Systems, Inc.
|
|
*/
|
|
|
|
/*
|
|
* The 512-byte leaf is broken into 32 16-byte chunks.
|
|
* chunk number n means l_chunk[n], even though the header precedes it.
|
|
* the names are stored null-terminated.
|
|
*/
|
|
|
|
#include <sys/zio.h>
|
|
#include <sys/spa.h>
|
|
#include <sys/dmu.h>
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/fs/zfs.h>
|
|
#include <sys/zap.h>
|
|
#include <sys/zap_impl.h>
|
|
#include <sys/zap_leaf.h>
|
|
#include <sys/arc.h>
|
|
|
|
static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
|
|
|
|
#define CHAIN_END 0xffff /* end of the chunk chain */
|
|
|
|
/* half the (current) minimum block size */
|
|
#define MAX_ARRAY_BYTES (8<<10)
|
|
|
|
#define LEAF_HASH(l, h) \
|
|
((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
|
|
((h) >> \
|
|
(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
|
|
|
|
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
|
|
|
|
extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
|
|
|
|
static void
|
|
zap_memset(void *a, int c, size_t n)
|
|
{
|
|
char *cp = a;
|
|
char *cpend = cp + n;
|
|
|
|
while (cp < cpend)
|
|
*cp++ = c;
|
|
}
|
|
|
|
static void
|
|
stv(int len, void *addr, uint64_t value)
|
|
{
|
|
switch (len) {
|
|
case 1:
|
|
*(uint8_t *)addr = value;
|
|
return;
|
|
case 2:
|
|
*(uint16_t *)addr = value;
|
|
return;
|
|
case 4:
|
|
*(uint32_t *)addr = value;
|
|
return;
|
|
case 8:
|
|
*(uint64_t *)addr = value;
|
|
return;
|
|
default:
|
|
cmn_err(CE_PANIC, "bad int len %d", len);
|
|
}
|
|
}
|
|
|
|
static uint64_t
|
|
ldv(int len, const void *addr)
|
|
{
|
|
switch (len) {
|
|
case 1:
|
|
return (*(uint8_t *)addr);
|
|
case 2:
|
|
return (*(uint16_t *)addr);
|
|
case 4:
|
|
return (*(uint32_t *)addr);
|
|
case 8:
|
|
return (*(uint64_t *)addr);
|
|
default:
|
|
cmn_err(CE_PANIC, "bad int len %d", len);
|
|
}
|
|
return (0xFEEDFACEDEADBEEFULL);
|
|
}
|
|
|
|
void
|
|
zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
|
|
{
|
|
zap_leaf_t l;
|
|
dmu_buf_t l_dbuf;
|
|
|
|
l_dbuf.db_data = buf;
|
|
l.l_bs = highbit64(size) - 1;
|
|
l.l_dbuf = &l_dbuf;
|
|
|
|
buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type);
|
|
buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix);
|
|
buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic);
|
|
buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree);
|
|
buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries);
|
|
buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len);
|
|
buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist);
|
|
|
|
for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
|
|
buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
|
|
|
|
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
|
|
zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
|
|
struct zap_leaf_entry *le;
|
|
|
|
switch (lc->l_free.lf_type) {
|
|
case ZAP_CHUNK_ENTRY:
|
|
le = &lc->l_entry;
|
|
|
|
le->le_type = BSWAP_8(le->le_type);
|
|
le->le_value_intlen = BSWAP_8(le->le_value_intlen);
|
|
le->le_next = BSWAP_16(le->le_next);
|
|
le->le_name_chunk = BSWAP_16(le->le_name_chunk);
|
|
le->le_name_numints = BSWAP_16(le->le_name_numints);
|
|
le->le_value_chunk = BSWAP_16(le->le_value_chunk);
|
|
le->le_value_numints = BSWAP_16(le->le_value_numints);
|
|
le->le_cd = BSWAP_32(le->le_cd);
|
|
le->le_hash = BSWAP_64(le->le_hash);
|
|
break;
|
|
case ZAP_CHUNK_FREE:
|
|
lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type);
|
|
lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next);
|
|
break;
|
|
case ZAP_CHUNK_ARRAY:
|
|
lc->l_array.la_type = BSWAP_8(lc->l_array.la_type);
|
|
lc->l_array.la_next = BSWAP_16(lc->l_array.la_next);
|
|
/* la_array doesn't need swapping */
|
|
break;
|
|
default:
|
|
cmn_err(CE_PANIC, "bad leaf type %d",
|
|
lc->l_free.lf_type);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
zap_leaf_init(zap_leaf_t *l, boolean_t sort)
|
|
{
|
|
l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
|
|
zap_memset(&zap_leaf_phys(l)->l_hdr, 0,
|
|
sizeof (struct zap_leaf_header));
|
|
zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
|
|
2*ZAP_LEAF_HASH_NUMENTRIES(l));
|
|
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
|
|
ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
|
|
ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
|
|
}
|
|
ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
|
|
zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF;
|
|
zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
|
|
zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
|
|
if (sort)
|
|
zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
|
|
}
|
|
|
|
/*
|
|
* Routines which manipulate leaf chunks (l_chunk[]).
|
|
*/
|
|
|
|
static uint16_t
|
|
zap_leaf_chunk_alloc(zap_leaf_t *l)
|
|
{
|
|
ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0);
|
|
|
|
int chunk = zap_leaf_phys(l)->l_hdr.lh_freelist;
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_freelist =
|
|
ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_nfree--;
|
|
|
|
return (chunk);
|
|
}
|
|
|
|
static void
|
|
zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
|
|
{
|
|
struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
|
|
ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
|
|
|
|
zlf->lf_type = ZAP_CHUNK_FREE;
|
|
zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist;
|
|
bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
|
|
zap_leaf_phys(l)->l_hdr.lh_freelist = chunk;
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_nfree++;
|
|
}
|
|
|
|
/*
|
|
* Routines which manipulate leaf arrays (zap_leaf_array type chunks).
|
|
*/
|
|
|
|
static uint16_t
|
|
zap_leaf_array_create(zap_leaf_t *l, const char *buf,
|
|
int integer_size, int num_integers)
|
|
{
|
|
uint16_t chunk_head;
|
|
uint16_t *chunkp = &chunk_head;
|
|
int byten = 0;
|
|
uint64_t value = 0;
|
|
int shift = (integer_size - 1) * 8;
|
|
int len = num_integers;
|
|
|
|
ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
|
|
|
|
while (len > 0) {
|
|
uint16_t chunk = zap_leaf_chunk_alloc(l);
|
|
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
|
|
la->la_type = ZAP_CHUNK_ARRAY;
|
|
for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
|
|
if (byten == 0)
|
|
value = ldv(integer_size, buf);
|
|
la->la_array[i] = value >> shift;
|
|
value <<= 8;
|
|
if (++byten == integer_size) {
|
|
byten = 0;
|
|
buf += integer_size;
|
|
if (--len == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
*chunkp = chunk;
|
|
chunkp = &la->la_next;
|
|
}
|
|
*chunkp = CHAIN_END;
|
|
|
|
return (chunk_head);
|
|
}
|
|
|
|
static void
|
|
zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
|
|
{
|
|
uint16_t chunk = *chunkp;
|
|
|
|
*chunkp = CHAIN_END;
|
|
|
|
while (chunk != CHAIN_END) {
|
|
int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
|
|
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
|
|
ZAP_CHUNK_ARRAY);
|
|
zap_leaf_chunk_free(l, chunk);
|
|
chunk = nextchunk;
|
|
}
|
|
}
|
|
|
|
/* array_len and buf_len are in integers, not bytes */
|
|
static void
|
|
zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
|
|
int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
|
|
void *buf)
|
|
{
|
|
int len = MIN(array_len, buf_len);
|
|
int byten = 0;
|
|
uint64_t value = 0;
|
|
char *p = buf;
|
|
|
|
ASSERT3U(array_int_len, <=, buf_int_len);
|
|
|
|
/* Fast path for one 8-byte integer */
|
|
if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
|
|
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
uint8_t *ip = la->la_array;
|
|
uint64_t *buf64 = buf;
|
|
|
|
*buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
|
|
(uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
|
|
(uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
|
|
(uint64_t)ip[6] << 8 | (uint64_t)ip[7];
|
|
return;
|
|
}
|
|
|
|
/* Fast path for an array of 1-byte integers (eg. the entry name) */
|
|
if (array_int_len == 1 && buf_int_len == 1 &&
|
|
buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
|
|
while (chunk != CHAIN_END) {
|
|
struct zap_leaf_array *la =
|
|
&ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES);
|
|
p += ZAP_LEAF_ARRAY_BYTES;
|
|
chunk = la->la_next;
|
|
}
|
|
return;
|
|
}
|
|
|
|
while (len > 0) {
|
|
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
|
|
value = (value << 8) | la->la_array[i];
|
|
byten++;
|
|
if (byten == array_int_len) {
|
|
stv(buf_int_len, p, value);
|
|
byten = 0;
|
|
len--;
|
|
if (len == 0)
|
|
return;
|
|
p += buf_int_len;
|
|
}
|
|
}
|
|
chunk = la->la_next;
|
|
}
|
|
}
|
|
|
|
static boolean_t
|
|
zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
|
|
int chunk, int array_numints)
|
|
{
|
|
int bseen = 0;
|
|
|
|
if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
|
|
uint64_t *thiskey =
|
|
kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP);
|
|
ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
|
|
|
|
zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
|
|
sizeof (*thiskey), array_numints, thiskey);
|
|
boolean_t match = bcmp(thiskey, zn->zn_key_orig,
|
|
array_numints * sizeof (*thiskey)) == 0;
|
|
kmem_free(thiskey, array_numints * sizeof (*thiskey));
|
|
return (match);
|
|
}
|
|
|
|
ASSERT(zn->zn_key_intlen == 1);
|
|
if (zn->zn_matchtype & MT_NORMALIZE) {
|
|
char *thisname = kmem_alloc(array_numints, KM_SLEEP);
|
|
|
|
zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
|
|
sizeof (char), array_numints, thisname);
|
|
boolean_t match = zap_match(zn, thisname);
|
|
kmem_free(thisname, array_numints);
|
|
return (match);
|
|
}
|
|
|
|
/*
|
|
* Fast path for exact matching.
|
|
* First check that the lengths match, so that we don't read
|
|
* past the end of the zn_key_orig array.
|
|
*/
|
|
if (array_numints != zn->zn_key_orig_numints)
|
|
return (B_FALSE);
|
|
while (bseen < array_numints) {
|
|
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread))
|
|
break;
|
|
chunk = la->la_next;
|
|
bseen += toread;
|
|
}
|
|
return (bseen == array_numints);
|
|
}
|
|
|
|
/*
|
|
* Routines which manipulate leaf entries.
|
|
*/
|
|
|
|
int
|
|
zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
|
|
{
|
|
struct zap_leaf_entry *le;
|
|
|
|
ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
|
|
|
|
for (uint16_t *chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
|
|
*chunkp != CHAIN_END; chunkp = &le->le_next) {
|
|
uint16_t chunk = *chunkp;
|
|
le = ZAP_LEAF_ENTRY(l, chunk);
|
|
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
if (le->le_hash != zn->zn_hash)
|
|
continue;
|
|
|
|
/*
|
|
* NB: the entry chain is always sorted by cd on
|
|
* normalized zap objects, so this will find the
|
|
* lowest-cd match for MT_NORMALIZE.
|
|
*/
|
|
ASSERT((zn->zn_matchtype == 0) ||
|
|
(zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
|
|
if (zap_leaf_array_match(l, zn, le->le_name_chunk,
|
|
le->le_name_numints)) {
|
|
zeh->zeh_num_integers = le->le_value_numints;
|
|
zeh->zeh_integer_size = le->le_value_intlen;
|
|
zeh->zeh_cd = le->le_cd;
|
|
zeh->zeh_hash = le->le_hash;
|
|
zeh->zeh_chunkp = chunkp;
|
|
zeh->zeh_leaf = l;
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
return (SET_ERROR(ENOENT));
|
|
}
|
|
|
|
/* Return (h1,cd1 >= h2,cd2) */
|
|
#define HCD_GTEQ(h1, cd1, h2, cd2) \
|
|
((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
|
|
|
|
int
|
|
zap_leaf_lookup_closest(zap_leaf_t *l,
|
|
uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
|
|
{
|
|
uint64_t besth = -1ULL;
|
|
uint32_t bestcd = -1U;
|
|
uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
|
|
struct zap_leaf_entry *le;
|
|
|
|
ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
|
|
|
|
for (uint16_t lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
|
|
for (uint16_t chunk = zap_leaf_phys(l)->l_hash[lh];
|
|
chunk != CHAIN_END; chunk = le->le_next) {
|
|
le = ZAP_LEAF_ENTRY(l, chunk);
|
|
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
|
|
HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
|
|
ASSERT3U(bestlh, >=, lh);
|
|
bestlh = lh;
|
|
besth = le->le_hash;
|
|
bestcd = le->le_cd;
|
|
|
|
zeh->zeh_num_integers = le->le_value_numints;
|
|
zeh->zeh_integer_size = le->le_value_intlen;
|
|
zeh->zeh_cd = le->le_cd;
|
|
zeh->zeh_hash = le->le_hash;
|
|
zeh->zeh_fakechunk = chunk;
|
|
zeh->zeh_chunkp = &zeh->zeh_fakechunk;
|
|
zeh->zeh_leaf = l;
|
|
}
|
|
}
|
|
}
|
|
|
|
return (bestcd == -1U ? ENOENT : 0);
|
|
}
|
|
|
|
int
|
|
zap_entry_read(const zap_entry_handle_t *zeh,
|
|
uint8_t integer_size, uint64_t num_integers, void *buf)
|
|
{
|
|
struct zap_leaf_entry *le =
|
|
ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
if (le->le_value_intlen > integer_size)
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
|
|
le->le_value_intlen, le->le_value_numints,
|
|
integer_size, num_integers, buf);
|
|
|
|
if (zeh->zeh_num_integers > num_integers)
|
|
return (SET_ERROR(EOVERFLOW));
|
|
return (0);
|
|
|
|
}
|
|
|
|
int
|
|
zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
|
|
char *buf)
|
|
{
|
|
struct zap_leaf_entry *le =
|
|
ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
|
|
zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
|
|
le->le_name_numints, 8, buflen / 8, buf);
|
|
} else {
|
|
zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
|
|
le->le_name_numints, 1, buflen, buf);
|
|
}
|
|
if (le->le_name_numints > buflen)
|
|
return (SET_ERROR(EOVERFLOW));
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
zap_entry_update(zap_entry_handle_t *zeh,
|
|
uint8_t integer_size, uint64_t num_integers, const void *buf)
|
|
{
|
|
zap_leaf_t *l = zeh->zeh_leaf;
|
|
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
|
|
|
|
int delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
|
|
ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
|
|
|
|
if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
|
|
return (SET_ERROR(EAGAIN));
|
|
|
|
zap_leaf_array_free(l, &le->le_value_chunk);
|
|
le->le_value_chunk =
|
|
zap_leaf_array_create(l, buf, integer_size, num_integers);
|
|
le->le_value_numints = num_integers;
|
|
le->le_value_intlen = integer_size;
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
zap_entry_remove(zap_entry_handle_t *zeh)
|
|
{
|
|
zap_leaf_t *l = zeh->zeh_leaf;
|
|
|
|
ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
|
|
|
|
uint16_t entry_chunk = *zeh->zeh_chunkp;
|
|
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
zap_leaf_array_free(l, &le->le_name_chunk);
|
|
zap_leaf_array_free(l, &le->le_value_chunk);
|
|
|
|
*zeh->zeh_chunkp = le->le_next;
|
|
zap_leaf_chunk_free(l, entry_chunk);
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_nentries--;
|
|
}
|
|
|
|
int
|
|
zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
|
|
uint8_t integer_size, uint64_t num_integers, const void *buf,
|
|
zap_entry_handle_t *zeh)
|
|
{
|
|
uint16_t chunk;
|
|
struct zap_leaf_entry *le;
|
|
uint64_t h = zn->zn_hash;
|
|
|
|
uint64_t valuelen = integer_size * num_integers;
|
|
|
|
int numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
|
|
zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
|
|
if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
|
|
return (SET_ERROR(E2BIG));
|
|
|
|
if (cd == ZAP_NEED_CD) {
|
|
/* find the lowest unused cd */
|
|
if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
|
|
cd = 0;
|
|
|
|
for (chunk = *LEAF_HASH_ENTPTR(l, h);
|
|
chunk != CHAIN_END; chunk = le->le_next) {
|
|
le = ZAP_LEAF_ENTRY(l, chunk);
|
|
if (le->le_cd > cd)
|
|
break;
|
|
if (le->le_hash == h) {
|
|
ASSERT3U(cd, ==, le->le_cd);
|
|
cd++;
|
|
}
|
|
}
|
|
} else {
|
|
/* old unsorted format; do it the O(n^2) way */
|
|
for (cd = 0; ; cd++) {
|
|
for (chunk = *LEAF_HASH_ENTPTR(l, h);
|
|
chunk != CHAIN_END; chunk = le->le_next) {
|
|
le = ZAP_LEAF_ENTRY(l, chunk);
|
|
if (le->le_hash == h &&
|
|
le->le_cd == cd) {
|
|
break;
|
|
}
|
|
}
|
|
/* If this cd is not in use, we are good. */
|
|
if (chunk == CHAIN_END)
|
|
break;
|
|
}
|
|
}
|
|
/*
|
|
* We would run out of space in a block before we could
|
|
* store enough entries to run out of CD values.
|
|
*/
|
|
ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
|
|
}
|
|
|
|
if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks)
|
|
return (SET_ERROR(EAGAIN));
|
|
|
|
/* make the entry */
|
|
chunk = zap_leaf_chunk_alloc(l);
|
|
le = ZAP_LEAF_ENTRY(l, chunk);
|
|
le->le_type = ZAP_CHUNK_ENTRY;
|
|
le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
|
|
zn->zn_key_intlen, zn->zn_key_orig_numints);
|
|
le->le_name_numints = zn->zn_key_orig_numints;
|
|
le->le_value_chunk =
|
|
zap_leaf_array_create(l, buf, integer_size, num_integers);
|
|
le->le_value_numints = num_integers;
|
|
le->le_value_intlen = integer_size;
|
|
le->le_hash = h;
|
|
le->le_cd = cd;
|
|
|
|
/* link it into the hash chain */
|
|
/* XXX if we did the search above, we could just use that */
|
|
uint16_t *chunkp = zap_leaf_rehash_entry(l, chunk);
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_nentries++;
|
|
|
|
zeh->zeh_leaf = l;
|
|
zeh->zeh_num_integers = num_integers;
|
|
zeh->zeh_integer_size = le->le_value_intlen;
|
|
zeh->zeh_cd = le->le_cd;
|
|
zeh->zeh_hash = le->le_hash;
|
|
zeh->zeh_chunkp = chunkp;
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Determine if there is another entry with the same normalized form.
|
|
* For performance purposes, either zn or name must be provided (the
|
|
* other can be NULL). Note, there usually won't be any hash
|
|
* conflicts, in which case we don't need the concatenated/normalized
|
|
* form of the name. But all callers have one of these on hand anyway,
|
|
* so might as well take advantage. A cleaner but slower interface
|
|
* would accept neither argument, and compute the normalized name as
|
|
* needed (using zap_name_alloc(zap_entry_read_name(zeh))).
|
|
*/
|
|
boolean_t
|
|
zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
|
|
const char *name, zap_t *zap)
|
|
{
|
|
struct zap_leaf_entry *le;
|
|
boolean_t allocdzn = B_FALSE;
|
|
|
|
if (zap->zap_normflags == 0)
|
|
return (B_FALSE);
|
|
|
|
for (uint16_t chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
|
|
chunk != CHAIN_END; chunk = le->le_next) {
|
|
le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
|
|
if (le->le_hash != zeh->zeh_hash)
|
|
continue;
|
|
if (le->le_cd == zeh->zeh_cd)
|
|
continue;
|
|
|
|
if (zn == NULL) {
|
|
zn = zap_name_alloc(zap, name, MT_NORMALIZE);
|
|
allocdzn = B_TRUE;
|
|
}
|
|
if (zap_leaf_array_match(zeh->zeh_leaf, zn,
|
|
le->le_name_chunk, le->le_name_numints)) {
|
|
if (allocdzn)
|
|
zap_name_free(zn);
|
|
return (B_TRUE);
|
|
}
|
|
}
|
|
if (allocdzn)
|
|
zap_name_free(zn);
|
|
return (B_FALSE);
|
|
}
|
|
|
|
/*
|
|
* Routines for transferring entries between leafs.
|
|
*/
|
|
|
|
static uint16_t *
|
|
zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
|
|
{
|
|
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
|
|
struct zap_leaf_entry *le2;
|
|
uint16_t *chunkp;
|
|
|
|
/*
|
|
* keep the entry chain sorted by cd
|
|
* NB: this will not cause problems for unsorted leafs, though
|
|
* it is unnecessary there.
|
|
*/
|
|
for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
|
|
*chunkp != CHAIN_END; chunkp = &le2->le_next) {
|
|
le2 = ZAP_LEAF_ENTRY(l, *chunkp);
|
|
if (le2->le_cd > le->le_cd)
|
|
break;
|
|
}
|
|
|
|
le->le_next = *chunkp;
|
|
*chunkp = entry;
|
|
return (chunkp);
|
|
}
|
|
|
|
static uint16_t
|
|
zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
|
|
{
|
|
uint16_t new_chunk;
|
|
uint16_t *nchunkp = &new_chunk;
|
|
|
|
while (chunk != CHAIN_END) {
|
|
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
|
|
struct zap_leaf_array *nla =
|
|
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
|
|
struct zap_leaf_array *la =
|
|
&ZAP_LEAF_CHUNK(l, chunk).l_array;
|
|
int nextchunk = la->la_next;
|
|
|
|
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
|
|
|
*nla = *la; /* structure assignment */
|
|
|
|
zap_leaf_chunk_free(l, chunk);
|
|
chunk = nextchunk;
|
|
*nchunkp = nchunk;
|
|
nchunkp = &nla->la_next;
|
|
}
|
|
*nchunkp = CHAIN_END;
|
|
return (new_chunk);
|
|
}
|
|
|
|
static void
|
|
zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
|
|
{
|
|
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
|
|
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
|
|
|
uint16_t chunk = zap_leaf_chunk_alloc(nl);
|
|
struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk);
|
|
*nle = *le; /* structure assignment */
|
|
|
|
(void) zap_leaf_rehash_entry(nl, chunk);
|
|
|
|
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
|
|
nle->le_value_chunk =
|
|
zap_leaf_transfer_array(l, le->le_value_chunk, nl);
|
|
|
|
zap_leaf_chunk_free(l, entry);
|
|
|
|
zap_leaf_phys(l)->l_hdr.lh_nentries--;
|
|
zap_leaf_phys(nl)->l_hdr.lh_nentries++;
|
|
}
|
|
|
|
/*
|
|
* Transfer the entries whose hash prefix ends in 1 to the new leaf.
|
|
*/
|
|
void
|
|
zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
|
|
{
|
|
int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len;
|
|
|
|
/* set new prefix and prefix_len */
|
|
zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1;
|
|
zap_leaf_phys(l)->l_hdr.lh_prefix_len++;
|
|
zap_leaf_phys(nl)->l_hdr.lh_prefix =
|
|
zap_leaf_phys(l)->l_hdr.lh_prefix | 1;
|
|
zap_leaf_phys(nl)->l_hdr.lh_prefix_len =
|
|
zap_leaf_phys(l)->l_hdr.lh_prefix_len;
|
|
|
|
/* break existing hash chains */
|
|
zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
|
|
2*ZAP_LEAF_HASH_NUMENTRIES(l));
|
|
|
|
if (sort)
|
|
zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
|
|
|
|
/*
|
|
* Transfer entries whose hash bit 'bit' is set to nl; rehash
|
|
* the remaining entries
|
|
*
|
|
* NB: We could find entries via the hashtable instead. That
|
|
* would be O(hashents+numents) rather than O(numblks+numents),
|
|
* but this accesses memory more sequentially, and when we're
|
|
* called, the block is usually pretty full.
|
|
*/
|
|
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
|
|
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
|
|
if (le->le_type != ZAP_CHUNK_ENTRY)
|
|
continue;
|
|
|
|
if (le->le_hash & (1ULL << bit))
|
|
zap_leaf_transfer_entry(l, i, nl);
|
|
else
|
|
(void) zap_leaf_rehash_entry(l, i);
|
|
}
|
|
}
|
|
|
|
void
|
|
zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
|
|
{
|
|
int n = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
|
|
zap_leaf_phys(l)->l_hdr.lh_prefix_len;
|
|
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
|
|
zs->zs_leafs_with_2n_pointers[n]++;
|
|
|
|
|
|
n = zap_leaf_phys(l)->l_hdr.lh_nentries/5;
|
|
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
|
|
zs->zs_blocks_with_n5_entries[n]++;
|
|
|
|
n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
|
|
zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
|
|
(1<<FZAP_BLOCK_SHIFT(zap));
|
|
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
|
|
zs->zs_blocks_n_tenths_full[n]++;
|
|
|
|
for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
|
|
int nentries = 0;
|
|
int chunk = zap_leaf_phys(l)->l_hash[i];
|
|
|
|
while (chunk != CHAIN_END) {
|
|
struct zap_leaf_entry *le =
|
|
ZAP_LEAF_ENTRY(l, chunk);
|
|
|
|
n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
|
|
ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
|
|
le->le_value_intlen);
|
|
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
|
|
zs->zs_entries_using_n_chunks[n]++;
|
|
|
|
chunk = le->le_next;
|
|
nentries++;
|
|
}
|
|
|
|
n = nentries;
|
|
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
|
|
zs->zs_buckets_with_n_entries[n]++;
|
|
}
|
|
}
|