mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-01 05:49:35 +03:00
a896468c78
Authored by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: Yuri Pankov <yuri.pankov@gmail.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/8067 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/8173085 Closes #6319
155 lines
4.2 KiB
C
155 lines
4.2 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* This file and its contents are supplied under the terms of the
|
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
|
* You may only use this file in accordance with the terms of version
|
|
* 1.0 of the CDDL.
|
|
*
|
|
* A full copy of the text of the CDDL should have accompanied this
|
|
* source. A copy of the CDDL is also available via the Internet at
|
|
* http://www.illumos.org/license/CDDL.
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/zio.h>
|
|
#include <sys/zio_compress.h>
|
|
|
|
/*
|
|
* Embedded-data Block Pointers
|
|
*
|
|
* Normally, block pointers point (via their DVAs) to a block which holds data.
|
|
* If the data that we need to store is very small, this is an inefficient
|
|
* use of space, because a block must be at minimum 1 sector (typically 512
|
|
* bytes or 4KB). Additionally, reading these small blocks tends to generate
|
|
* more random reads.
|
|
*
|
|
* Embedded-data Block Pointers allow small pieces of data (the "payload",
|
|
* up to 112 bytes) to be stored in the block pointer itself, instead of
|
|
* being pointed to. The "Pointer" part of this name is a bit of a
|
|
* misnomer, as nothing is pointed to.
|
|
*
|
|
* BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
|
|
* be embedded in the block pointer. The logic for this is handled in
|
|
* the SPA, by the zio pipeline. Therefore most code outside the zio
|
|
* pipeline doesn't need special-cases to handle these block pointers.
|
|
*
|
|
* See spa.h for details on the exact layout of embedded block pointers.
|
|
*/
|
|
|
|
void
|
|
encode_embedded_bp_compressed(blkptr_t *bp, void *data,
|
|
enum zio_compress comp, int uncompressed_size, int compressed_size)
|
|
{
|
|
uint64_t *bp64 = (uint64_t *)bp;
|
|
uint64_t w = 0;
|
|
uint8_t *data8 = data;
|
|
int i;
|
|
|
|
ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE);
|
|
ASSERT(uncompressed_size == compressed_size ||
|
|
comp != ZIO_COMPRESS_OFF);
|
|
ASSERT3U(comp, >=, ZIO_COMPRESS_OFF);
|
|
ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
|
|
|
|
bzero(bp, sizeof (*bp));
|
|
BP_SET_EMBEDDED(bp, B_TRUE);
|
|
BP_SET_COMPRESS(bp, comp);
|
|
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
|
|
BPE_SET_LSIZE(bp, uncompressed_size);
|
|
BPE_SET_PSIZE(bp, compressed_size);
|
|
|
|
/*
|
|
* Encode the byte array into the words of the block pointer.
|
|
* First byte goes into low bits of first word (little endian).
|
|
*/
|
|
for (i = 0; i < compressed_size; i++) {
|
|
BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]);
|
|
if (i % sizeof (w) == sizeof (w) - 1) {
|
|
/* we've reached the end of a word */
|
|
ASSERT3P(bp64, <, bp + 1);
|
|
*bp64 = w;
|
|
bp64++;
|
|
if (!BPE_IS_PAYLOADWORD(bp, bp64))
|
|
bp64++;
|
|
w = 0;
|
|
}
|
|
}
|
|
/* write last partial word */
|
|
if (bp64 < (uint64_t *)(bp + 1))
|
|
*bp64 = w;
|
|
}
|
|
|
|
/*
|
|
* buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
|
|
* more than BPE_PAYLOAD_SIZE bytes).
|
|
*/
|
|
void
|
|
decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
|
|
{
|
|
int psize;
|
|
uint8_t *buf8 = buf;
|
|
uint64_t w = 0;
|
|
const uint64_t *bp64 = (const uint64_t *)bp;
|
|
int i;
|
|
|
|
ASSERT(BP_IS_EMBEDDED(bp));
|
|
|
|
psize = BPE_GET_PSIZE(bp);
|
|
|
|
/*
|
|
* Decode the words of the block pointer into the byte array.
|
|
* Low bits of first word are the first byte (little endian).
|
|
*/
|
|
for (i = 0; i < psize; i++) {
|
|
if (i % sizeof (w) == 0) {
|
|
/* beginning of a word */
|
|
ASSERT3P(bp64, <, bp + 1);
|
|
w = *bp64;
|
|
bp64++;
|
|
if (!BPE_IS_PAYLOADWORD(bp, bp64))
|
|
bp64++;
|
|
}
|
|
buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Fill in the buffer with the (decompressed) payload of the embedded
|
|
* blkptr_t. Takes into account compression and byteorder (the payload is
|
|
* treated as a stream of bytes).
|
|
* Return 0 on success, or ENOSPC if it won't fit in the buffer.
|
|
*/
|
|
int
|
|
decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
|
|
{
|
|
int lsize, psize;
|
|
|
|
ASSERT(BP_IS_EMBEDDED(bp));
|
|
|
|
lsize = BPE_GET_LSIZE(bp);
|
|
psize = BPE_GET_PSIZE(bp);
|
|
|
|
if (lsize > buflen)
|
|
return (ENOSPC);
|
|
ASSERT3U(lsize, ==, buflen);
|
|
|
|
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
|
|
uint8_t dstbuf[BPE_PAYLOAD_SIZE];
|
|
decode_embedded_bp_compressed(bp, dstbuf);
|
|
VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
|
|
dstbuf, buf, psize, buflen));
|
|
} else {
|
|
ASSERT3U(lsize, ==, psize);
|
|
decode_embedded_bp_compressed(bp, buf);
|
|
}
|
|
|
|
return (0);
|
|
}
|