-
Notifications
You must be signed in to change notification settings - Fork 3
Using and extending the code
Here is how to compress/decompress a block to/from a file using RLT+TEXT as transform, Huffman as entropy codec, using a block size of 1 MB, 4 jobs and a checksum.
Just create a CompressedOutputStream/CompressedInputStream to write/read compressed data.
Notice that CompressedOutputStream is a std::ostream and CompressedInputStream is a std::istream, so most operations on streams work as usual.
#include <fstream>
#include <iostream>
#include "types.hpp"
#include "InputStream.hpp"
#include "OutputStream.hpp"
#include "io/CompressedInputStream.hpp"
#include "io/CompressedOutputStream.hpp"
using namespace kanzi;
using namespace std;
uint64 testCompress(byte block[], uint length) {
// Create an OutputStream
OutputStream* os = new ofstream("compressed.knz", ofstream::out | ofstream::binary);
// Create a CompressedOutputStream
CompressedOutputStream cos(*os, "HUFFMAN", "RLT+TEXT", 1024 * 1024, true, 4);
// Compress block
cos.write((const char*) block, length);
// Close CompressedOutputStream
cos.close();
// Get number of bytes written
uint64 written = cos.getWritten();
delete os;
return written;
}
uint64 testDecompress(byte block[], uint length) {
// Create an InputStream
InputStream* is = new ifstream("compressed.knz", ifstream::in | ifstream::binary);
// Create a CompressedInputStream
CompressedInputStream cis(*is, 4);
// Decompress block
cis.read((char*) block, length);
// Close CompressedInputStream
cis.close();
// Get number of bytes read
uint64 read = cis.getRead();
delete is;
return read;
}
int main(int argc, const char** argv)
{
byte block[65536];
FILE* in = fopen("/tmp/enwik8", "rb");
const int sz = fread(block, 1, 65536, in);
if (sz > 0) {
uint64 c = testCompress(block, sz);
cout << "Block compressed from " << r << " bytes to " << c << " bytes" << endl;
testDecompress(block, sz);
}
return 0;
}
Kanzi exposes a C API (see api/libapi.hpp) and can be built as a static (.a) or a dynamic library (.so/.dll).
Below is an example of a C program compressing and decompressing data using the C API.
/* EG. gcc testAPI.c -o r:\testAPI.exe -lkanzi */
/* EG. gcc testAPI.c -o testAPI -lkanzi */
#include "api/libapi.hpp"
#include <stdlib.h>
int testDecompress()
{
const int blkSize = 4 * 1024 * 1024;
struct dData dd = { blkSize, 4, 0 };
FILE* in = fopen("/tmp/enwik8.knz", "rb");
struct dContext* ctx;
int res = 0;
if ((res = initDecompressor(&dd, in, &ctx)) != 0) {
fclose(in);
return res;
}
FILE* out = fopen("/tmp/enwik8.knz.bak", "wb");
int r = 0, w = 0, inSize = 0, outSize = 0;
BYTE* dst = (BYTE*)malloc(blkSize);
do {
w = blkSize;
r = decompress(ctx, dst, &r, &w);
if ((w = fwrite(dst, 1, w, out)) == 0)
break;
inSize += r;
outSize += w;
} while (r == 0);
if ((res = disposeDecompressor(ctx)) == 0) {
printf("Size before decompression: %i bytes(s)\n", inSize);
printf("Size after decompression: %i bytes(s)\n", outSize);
}
fclose(in);
fclose(out);
free(dst);
return res;
}
int testCompress()
{
const int blkSize = 4 * 1024 * 1024;
struct cData cd = { "BWT+RANK+MTFT", "FPAQ", blkSize, 4, 1, 0 };
FILE* in = fopen("/tmp/enwik8", "rb");
struct cContext* ctx;
int res = 0;
if ((res = initCompressor(&cd, out, &ctx)) != 0) {
fclose(in);
return res;
}
FILE* out = fopen("/tmp/enwik8.knz", "wb");
int r = 0, w = 0, inSize = 0, outSize = 0;
BYTE* src = (BYTE*)malloc(blkSize);
while ((r = fread(src, 1, blkSize, in)) != 0) {
if ((res = compress(ctx, src, &r, &w)) != 0)
break;
inSize += r;
outSize += w;
}
if ((res = disposeCompressor(ctx, &w)) == 0) {
outSize += w;
printf("Size before compression: %i bytes(s)\n", inSize);
printf("Size after compression: %i bytes(s)\n", outSize);
}
fclose(in);
fclose(out);
free(src);
return res;
}
int main(int argc, const char** argv)
{
int res;
if ((res = testCompress()) != 0)
return res;
if ((res = testDecompress()) != 0)
return res;
return 0;
}
Here is how to implement and add a new transform to kanzi.
- Step 1: write the transform code
For example:
#include "../Context.hpp"
#include "../Transform.hpp"
class SuperDuperTransform : public Transform<byte>
{
public:
SuperDuperTransform() {}
SuperDuperTransform(Context&) {}
~SuperDuperTransform() {}
bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW {
// Ensure enough room in the destination buffer
if (output._length - output._index < getMaxEncodedLength(length))
return false;
byte* src = &input._array[input._index];
byte* dst = &output._array[output._index];
for (int i = 0; i < length; i++)
dst[i] = src[i] ^ byte(0xAA);
input._index += length;
output._index += length;
return true;
}
bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW {
byte* src = &input._array[input._index];
byte* dst = &output._array[output._index];
for (int i = 0; i < length; i++)
dst[i] = src[i] ^ byte(0xAA);
input._index += length;
output._index += length;
return true;
}
int getMaxEncodedLength(int inputLen) const { return inputLen; }
};
Always provide a constructor with a Context: the context contains all the application wide information (such as block size, number of jobs, input & output names, etc ...). Always inherit from Transform<T> and respect the maximum number of jobs provided in the context. Implement forward and inverse methods as well as getMaxEncodedLength(int). Do not write to stdio or stderr. Be aware that your code must be multi-thread safe.
- Step 2: Register the transform in transform/TransformFactory.hpp
Add the type, say
static const uint64 SUPERDUPER_TYPE = 63;
Let us say you use the name "SUPERDUPER" for the transform. Update the following methods:
template <class T> uint64 TransformFactory<T>::getTypeToken(const char* tName) THROW
template <class T> Transform<T>* TransformFactory<T>::newToken(Context& ctx, uint64 functionType) THROW
template <class T> const char* TransformFactory<T>::getNameToken(uint64 functionType) THROW
- Step 3: Update the help message in app/Kanzi.cpp
In Kanzi::printHelp, add the SUPERDUPER transform to the list in the -t option section.
- Step 4: Update the makefile and rebuild the binary
- This is it. For example, run
kanzi -i foo.txt -f -t SUPERDUPER -e none -j 2 -v 4