Skip to content

Commit

Permalink
save digest of zfile's header/trailer and index
Browse files Browse the repository at this point in the history
Signed-off-by: Yifan Yuan <tuji.yyf@alibaba-inc.com>
  • Loading branch information
BigVan committed Sep 15, 2023
1 parent cc42223 commit 7315c31
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 28 deletions.
15 changes: 9 additions & 6 deletions src/overlaybd/zfile/format_spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ The format of header is described as below. All fields are little-endian.
| :---: | :----: | :----: | :--- |
| magic0 | 0 | 8 | "ZFile\0\1" (and an implicit '\0') |
| magic1 | 8 | 16 | 74 75 6A 69, 2E 79 79 66, 40 41 6C 69, 62 61 62 61 |
| size | 24 | uint32_t | size of the header struct (108), excluding the tail padding |
| reserved| 28 | 4 | reserved space, should be 0 |
| size | 24 | uint32_t | size of the header structure, excluding the tail padding |
| digest | 28 | uint32_t | checksum for the range 28-511 bytes in header |
| flags | 32 | uint64_t | bits for flags* (see later for details) |
| index_offset | 40 | uint64_t | index offset |
| index_size | 48 | uint64_t | size of the index section, possibly compressed|
| index_size | 48 | uint64_t | size of the index section, possibly compressed base on flags |
| original_file_size | 56 | uint64_t | size of the orignal file before compression |
| reserved| 64 | 8 | reserved space, should be 0 |
| block_size | 72 | uint32_t | size of each compression block |
| index_crc | 64 | uint32_t | checksum value of index |
| reserved| 68 | 4 | reserved space, should be 0 |
| block_size| 72 | uint32_t | size of each compression block |
| algo | 76 | uint8_t | compression algorithm |
| level | 77 | uint8_t | compression level |
| use_dict| 78 | bool | whether use dictionary |
Expand All @@ -45,7 +46,9 @@ The format of header is described as below. All fields are little-endian.
| type | 1 | this is a data file (1) or index file (0) |
| sealed | 2 | this file is sealed (1) or not (0) |
| info_valid | 3 | information validity of the fields *after* flags (they were initially invalid (0) after creation; and readers must resort to trailer when they meet such headers) |
| reserved | 4~63 | reserved for future use; must be 0s |
| digest | 4 | the digest of this header/trailer has been recorded in the digest field |
| index_comperssion | 5 | whether the index has been compressed(1) or not(0) |
| reserved | 6~63 | reserved for future use; must be 0s |


## index
Expand Down
32 changes: 32 additions & 0 deletions src/overlaybd/zfile/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,38 @@ TEST_F(ZFileTest, validation_check) {
EXPECT_NE(zfile_validation_check(fdst.get()), 0);
}

TEST_F(ZFileTest, ht_check) {
// log_output_level = 1;
auto fn_src = "verify.data";
auto fn_zfile = "verify.zfile";
auto src = lfs->open(fn_src, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644);
unique_ptr<IFile> fsrc(src);
if (!fsrc) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
randwrite(fsrc.get(), 1024);
struct stat _st;
if (fsrc->fstat(&_st) != 0) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
return;
}
auto dst = lfs->open(fn_zfile, O_CREAT | O_TRUNC | O_RDWR /*| O_DIRECT */, 0644);
unique_ptr<IFile> fdst(dst);
if (!fdst) {
LOG_ERROR("err: `(`)", errno, strerror(errno));
}
CompressOptions opt;
opt.algo = CompressOptions::LZ4;
opt.verify = 1;
CompressArgs args(opt);
int ret = zfile_compress(fsrc.get(), fdst.get(), &args);
EXPECT_EQ(ret, 0);
auto x=2324;
dst->pwrite(&x, sizeof(x), 400);
EXPECT_NE(zfile_validation_check(fdst.get()), 0);
EXPECT_EQ(is_zfile(dst), -1);
}

TEST_F(ZFileTest, dsa) {
const int buf_size = 1024;
const int crc_count = 3000;
Expand Down
91 changes: 69 additions & 22 deletions src/overlaybd/zfile/zfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,16 @@ class CompressionFile : public VirtualReadOnlyFile {

// offset 24, 28, 32
uint32_t size = sizeof(HeaderTrailer);
uint32_t __padding = 0;
// uint32_t __padding = 0;
uint32_t digest = 0;
uint64_t flags;

static const uint32_t FLAG_SHIFT_HEADER = 0; // 1:header 0:trailer
static const uint32_t FLAG_SHIFT_TYPE = 1; // 1:data file, 0:index file
static const uint32_t FLAG_SHIFT_SEALED = 2; // 1:YES, 0:NO
static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3;
static const uint32_t FLAG_SHIFT_HEADER_OVERWRITE = 3; // overwrite trailer info to header
static const uint32_t FLAG_SHIFT_CALC_DIGEST = 4; // caculate digest for zfile header/trailer and jumptable
static const uint32_t FLAG_SHIFT_IDX_COMP = 5; // compress zfile index(jumptable)

uint32_t get_flag_bit(uint32_t shift) const {
return flags & (1 << shift);
Expand Down Expand Up @@ -121,6 +124,17 @@ class CompressionFile : public VirtualReadOnlyFile {
bool is_sealed() const {
return get_flag_bit(FLAG_SHIFT_SEALED);
}
bool is_digest_enabled() {
return get_flag_bit(FLAG_SHIFT_CALC_DIGEST);
}
bool is_valid() {
if (!is_digest_enabled()) return true;
auto saved_crc = this->digest;
this->digest = 0;
DEFER(this->digest = saved_crc;);
auto crc = crc32::crc32c(this, CompressionFile::HeaderTrailer::SPACE);
return crc == saved_crc;
}
void set_header() {
set_flag_bit(FLAG_SHIFT_HEADER);
}
Expand All @@ -143,6 +157,14 @@ class CompressionFile : public VirtualReadOnlyFile {
set_flag_bit(FLAG_SHIFT_HEADER_OVERWRITE);
}

void set_digest_enable() {
set_flag_bit(FLAG_SHIFT_CALC_DIGEST);
}

void set_compress_index() {
set_flag_bit(FLAG_SHIFT_IDX_COMP);
}

void set_compress_option(const CompressOptions &opt) {
this->opt = opt;
}
Expand All @@ -151,7 +173,8 @@ class CompressionFile : public VirtualReadOnlyFile {
uint64_t index_offset; // in bytes
uint64_t index_size; // # of SegmentMappings
uint64_t original_file_size;
uint64_t reserved_0;
uint32_t index_crc;
uint32_t reserved_0;
// offset 72
CompressOptions opt;

Expand Down Expand Up @@ -181,7 +204,8 @@ class CompressionFile : public VirtualReadOnlyFile {
return deltas.size();
}

int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size) {
int build(const uint32_t *ibuf, size_t n, off_t offset_begin, uint32_t block_size,
bool enable_crc) {
partial_offset.clear();
deltas.clear();
group_size = (uinttype_max + 1) / block_size;
Expand All @@ -190,7 +214,11 @@ class CompressionFile : public VirtualReadOnlyFile {
auto raw_offset = offset_begin;
partial_offset.push_back(raw_offset);
deltas.push_back(0);
size_t min_blksize = (enable_crc ? sizeof(uint32_t) : 0);
for (ssize_t i = 1; i < (ssize_t)n + 1; i++) {
if (ibuf[i - 1] <= min_blksize) {
LOG_ERRNO_RETURN(EIO, -1, "unexpected block size(id: `):", i - 1, ibuf[i - 1]);
}
raw_offset += ibuf[i - 1];
if ((i % group_size) == 0) {
partial_offset.push_back(raw_offset);
Expand All @@ -199,7 +227,7 @@ class CompressionFile : public VirtualReadOnlyFile {
}
if ((uint64_t)deltas[i - 1] + ibuf[i - 1] >= (uint64_t)uinttype_max) {
LOG_ERROR_RETURN(ERANGE, -1, "build block[`] length failed `+` > ` (exceed)",
i-1, deltas[i-1], ibuf[i-1], (uint64_t)uinttype_max);
i - 1, deltas[i - 1], ibuf[i - 1], (uint64_t)uinttype_max);
}
deltas.push_back(deltas[i - 1] + ibuf[i - 1]);
}
Expand Down Expand Up @@ -260,13 +288,14 @@ class CompressionFile : public VirtualReadOnlyFile {
LOG_WARN("trim and reload. (idx: `, offset: `, len: `)", idx, begin_offset, read_size);
int trim_res = m_zfile->m_file->trim(begin_offset, read_size);
if (trim_res < 0) {
LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)",
idx, begin_offset, read_size);
LOG_ERRNO_RETURN(0, -1, "trim block failed. (idx: `, offset: `, len: `)", idx,
begin_offset, read_size);
}
auto readn = m_zfile->m_file->pread(m_buf + m_buf_offset, read_size, begin_offset);
if (readn != (ssize_t)read_size) {
LOG_ERRNO_RETURN(0, -1, "read compressed blocks failed. (idx: `, offset: `, len: `)",
idx, begin_offset, read_size);
LOG_ERRNO_RETURN(0, -1,
"read compressed blocks failed. (idx: `, offset: `, len: `)", idx,
begin_offset, read_size);
}
return 0;
}
Expand Down Expand Up @@ -351,8 +380,9 @@ class CompressionFile : public VirtualReadOnlyFile {
compressed_size = m_reader->compressed_size();
if ((size_t)(m_reader->m_buf_offset) + compressed_size > sizeof(m_buf)) {
m_reader->m_eno = ERANGE;
LOG_ERRNO_RETURN(0, -1, "inner buffer offset (`) + compressed size (`) overflow.",
m_reader->m_buf_offset, compressed_size);
LOG_ERRNO_RETURN(0, -1,
"inner buffer offset (`) + compressed size (`) overflow.",
m_reader->m_buf_offset, compressed_size);
}

if (blk_idx == m_reader->m_begin_idx) {
Expand Down Expand Up @@ -439,15 +469,15 @@ class CompressionFile : public VirtualReadOnlyFile {
if (count <= 0)
return 0;
if (offset + count > m_ht.original_file_size) {
LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)",
offset + count, m_ht.original_file_size);
LOG_ERRNO_RETURN(ERANGE, -1, "pread range exceed (` > `)", offset + count,
m_ht.original_file_size);
}
ssize_t readn = 0; // final will equal to count
unsigned char raw[MAX_READ_SIZE];
BlockReader br(this, offset, count);
for (auto &block : br) {
if (buf == nullptr) {
//used for prefetch; no copy, no decompress;
// used for prefetch; no copy, no decompress;
readn += block.cp_len;
continue;
}
Expand Down Expand Up @@ -506,7 +536,7 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo
CompressionFile::HeaderTrailer *pht, off_t offset = -1);

ssize_t compress_data(ICompressor *compressor, const unsigned char *buf, size_t count,
unsigned char *dest_buf, size_t dest_len, bool gen_crc) {
unsigned char *dest_buf, size_t dest_len, bool gen_crc) {

ssize_t compressed_len = 0;
auto ret = compressor->compress((const unsigned char *)buf, count, dest_buf, dest_len);
Expand Down Expand Up @@ -584,6 +614,7 @@ class ZFileBuilder : public VirtualReadOnlyFile {
LOG_ERRNO_RETURN(0, -1, "failed to write index.");
}
auto pht = (CompressionFile::HeaderTrailer *)m_ht;
pht->index_crc = crc32::crc32c(&m_block_len[0], index_bytes);
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->original_file_size = raw_data_size;
Expand Down Expand Up @@ -674,7 +705,9 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile
if (!pht->verify_magic() || !pht->is_header()) {
LOG_ERROR_RETURN(0, false, "header magic/type don't match");
}

if (pht->is_valid() == false) {
LOG_ERROR_RETURN(0, false, "digest verification failed.");
}
struct stat stat;
ret = file->fstat(&stat);
if (ret < 0) {
Expand Down Expand Up @@ -720,12 +753,20 @@ bool load_jump_table(IFile *file, CompressionFile::HeaderTrailer *pheader_traile
if (ret < (ssize_t)index_bytes) {
LOG_ERRNO_RETURN(0, false, "failed to read index");
}
if (pht->is_digest_enabled()) {
LOG_INFO("check jumptable CRC32 (` expected)", pht->index_crc);
auto crc = crc32::crc32c(ibuf.get(), index_bytes);
if (crc != pht->index_crc) {
LOG_ERRNO_RETURN(0, false, "checksum of jumptable is incorrect");
}
}
ret = jump_table.build(ibuf.get(), pht->index_size,
CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size,
pht->opt.block_size);
CompressionFile::HeaderTrailer::SPACE + pht->opt.dict_size,
pht->opt.block_size, pht->opt.verify);
if (ret != 0) {
LOG_ERRNO_RETURN(0, false, "failed to build jump table");
}

if (pheader_trailer)
*pheader_trailer = *pht;
return true;
Expand All @@ -745,8 +786,7 @@ IFile *zfile_open_ro(IFile *file, bool verify, bool ownership) {
auto res = file->fallocate(0, 0, -1);
LOG_ERROR("failed to load jump table, fallocate result: `", res);
if (res < 0) {
LOG_ERRNO_RETURN(0, nullptr,
"failed to load jump table and failed to evict");
LOG_ERRNO_RETURN(0, nullptr, "failed to load jump table and failed to evict");
}
if (retry--) {
LOG_INFO("retry loading jump table");
Expand Down Expand Up @@ -787,7 +827,10 @@ static int write_header_trailer(IFile *file, bool is_header, bool is_sealed, boo
if (offset != -1)
pht->set_header_overwrite();

LOG_INFO("pht->opt.dict_size: `", pht->opt.dict_size);
pht->set_digest_enable(); // by default
pht->digest = 0;
pht->digest = crc32::crc32c(pht, CompressionFile::HeaderTrailer::SPACE);
LOG_INFO("save header/trailer with digest: `", pht->digest);
if (offset == -1) {
return (int)file->write(pht, CompressionFile::HeaderTrailer::SPACE);
}
Expand All @@ -812,7 +855,6 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
char buf[CompressionFile::HeaderTrailer::SPACE] = {};
auto pht = new (buf) CompressionFile::HeaderTrailer;
pht->set_compress_option(opt);

LOG_INFO("write header.");
auto ret = write_header_trailer(as, true, false, true, pht);
if (ret < 0) {
Expand Down Expand Up @@ -884,6 +926,7 @@ int zfile_compress(IFile *file, IFile *as, const CompressArgs *args) {
if (as->write(&block_len[0], index_bytes) != index_bytes) {
LOG_ERRNO_RETURN(0, -1, "failed to write index.");
}
pht->index_crc = crc32::crc32c(&block_len[0], index_bytes);
pht->index_offset = index_offset;
pht->index_size = index_size;
pht->original_file_size = raw_data_size;
Expand Down Expand Up @@ -962,6 +1005,10 @@ int is_zfile(IFile *file) {
LOG_DEBUG("file: ` is not a zfile object", file);
return 0;
}
if (!pht->is_valid()) {
LOG_ERRNO_RETURN(0, -1,
"file: ` is a zfile object but verify digest failed.", file);
}
LOG_DEBUG("file: ` is a zfile object", file);
return 1;
}
Expand Down

0 comments on commit 7315c31

Please sign in to comment.