diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 875b2b56b87f..0e2dea2635a1 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -220,6 +220,9 @@ line of text and contains the following stats separated by whitespace: pages_compacted the number of pages freed during compaction huge_pages the number of incompressible pages + dup_data_size deduplicated data size + meta_data_size the amount of metadata allocated for deduplication feature + 9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index 9e2b79e9a990..29cb0082bb0c 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zcomp.o zram_drv.o +zram-y := zcomp.o zram_drv.o zram_dedup.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c new file mode 100644 index 000000000000..a8427f75b6ea --- /dev/null +++ b/drivers/block/zram/zram_dedup.c @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2017 Joonsoo Kim. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zram_drv.h" + +/* One slot will contain 128 pages theoretically */ +#define ZRAM_HASH_SHIFT 7 +#define ZRAM_HASH_SIZE_MIN (1 << 10) +#define ZRAM_HASH_SIZE_MAX (1 << 31) + +u64 zram_dedup_dup_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.dup_data_size); +} + +u64 zram_dedup_meta_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.meta_data_size); +} + +static u32 zram_dedup_checksum(unsigned char *mem) +{ + return jhash(mem, PAGE_SIZE, 0); +} + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum) +{ + struct zram_hash *hash; + struct rb_root *rb_root; + struct rb_node **rb_node, *parent = NULL; + struct zram_entry *entry; + + new->checksum = checksum; + hash = &zram->hash[checksum % zram->hash_size]; + rb_root = &hash->rb_root; + + spin_lock(&hash->lock); + rb_node = &rb_root->rb_node; + while (*rb_node) { + parent = *rb_node; + entry = rb_entry(parent, struct zram_entry, rb_node); + if (checksum < entry->checksum) + rb_node = &parent->rb_left; + else if (checksum > entry->checksum) + rb_node = &parent->rb_right; + else + rb_node = &parent->rb_left; + } + + rb_link_node(&new->rb_node, parent, rb_node); + rb_insert_color(&new->rb_node, rb_root); + spin_unlock(&hash->lock); +} + +static bool zram_dedup_match(struct zram *zram, struct zram_entry *entry, + unsigned char *mem) +{ + bool match = false; + unsigned char *cmem; + struct zcomp_strm *zstrm; + + cmem = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); + if (entry->len == PAGE_SIZE) { + match = !memcmp(mem, cmem, PAGE_SIZE); + } else { + zstrm = zcomp_stream_get(zram->comp); + if (!zcomp_decompress(zstrm, cmem, entry->len, zstrm->buffer)) + match = !memcmp(mem, zstrm->buffer, PAGE_SIZE); + zcomp_stream_put(zram->comp); + } + zs_unmap_object(zram->mem_pool, entry->handle); + + return match; +} + +static unsigned long zram_dedup_put(struct zram *zram, + struct zram_entry *entry) +{ + struct zram_hash *hash; + u32 checksum; + + checksum = entry->checksum; + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + + entry->refcount--; + if (!entry->refcount) + rb_erase(&entry->rb_node, &hash->rb_root); + else + atomic64_sub(entry->len, &zram->stats.dup_data_size); + + spin_unlock(&hash->lock); + + return entry->refcount; +} + +static struct zram_entry *zram_dedup_get(struct zram *zram, + unsigned char *mem, u32 checksum) +{ + struct zram_hash *hash; + struct zram_entry *entry; + struct rb_node *rb_node; + + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + rb_node = hash->rb_root.rb_node; + while (rb_node) { + entry = rb_entry(rb_node, struct zram_entry, rb_node); + if (checksum == entry->checksum) { + entry->refcount++; + atomic64_add(entry->len, &zram->stats.dup_data_size); + spin_unlock(&hash->lock); + + if (zram_dedup_match(zram, entry, mem)) + return entry; + + zram_entry_free(zram, entry); + + return NULL; + } + + if (checksum < entry->checksum) + rb_node = rb_node->rb_left; + else + rb_node = rb_node->rb_right; + } + spin_unlock(&hash->lock); + + return NULL; +} + +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum) +{ + void *mem; + struct zram_entry *entry; + + mem = kmap_atomic(page); + *checksum = zram_dedup_checksum(mem); + + entry = zram_dedup_get(zram, mem, *checksum); + kunmap_atomic(mem); + + return entry; +} + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len) +{ + entry->handle = handle; + entry->refcount = 1; + entry->len = len; +} + +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry) +{ + if (zram_dedup_put(zram, entry)) + return false; + + return true; +} + +int zram_dedup_init(struct zram *zram, size_t num_pages) +{ + int i; + struct zram_hash *hash; + + zram->hash_size = num_pages >> ZRAM_HASH_SHIFT; + zram->hash_size = min_t(size_t, ZRAM_HASH_SIZE_MAX, zram->hash_size); + zram->hash_size = max_t(size_t, ZRAM_HASH_SIZE_MIN, zram->hash_size); + zram->hash = vzalloc(zram->hash_size * sizeof(struct zram_hash)); + if (!zram->hash) { + pr_err("Error allocating zram entry hash\n"); + return -ENOMEM; + } + + for (i = 0; i < zram->hash_size; i++) { + hash = &zram->hash[i]; + spin_lock_init(&hash->lock); + hash->rb_root = RB_ROOT; + } + + return 0; +} + +void zram_dedup_fini(struct zram *zram) +{ + vfree(zram->hash); + zram->hash = NULL; + zram->hash_size = 0; +} diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h new file mode 100644 index 000000000000..ebe6bff6c0da --- /dev/null +++ b/drivers/block/zram/zram_dedup.h @@ -0,0 +1,22 @@ +#ifndef _ZRAM_DEDUP_H_ +#define _ZRAM_DEDUP_H_ + +struct zram; +struct zram_entry; + +u64 zram_dedup_dup_size(struct zram *zram); +u64 zram_dedup_meta_size(struct zram *zram); + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum); +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum); + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len); +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry); + +int zram_dedup_init(struct zram *zram, size_t num_pages); +void zram_dedup_fini(struct zram *zram); + +#endif /* _ZRAM_DEDUP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 43e3add13a12..4f2a711ded1f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -874,7 +874,7 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, @@ -882,9 +882,11 @@ static ssize_t mm_stat_show(struct device *dev, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), atomic_long_read(&pool_stats.pages_compacted), - (u64)atomic64_read(&zram->stats.huge_pages)); - up_read(&zram->init_lock); + (u64)atomic64_read(&zram->stats.huge_pages), + zram_dedup_dup_size(zram), + zram_dedup_meta_size(zram)); + up_read(&zram->init_lock); return ret; } @@ -914,26 +916,35 @@ static struct zram_entry *zram_entry_alloc(struct zram *zram, unsigned int len, gfp_t flags) { struct zram_entry *entry; + unsigned long handle; entry = kzalloc(sizeof(*entry), flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); if (!entry) return NULL; - entry->handle = zs_malloc(zram->mem_pool, len, flags); - if (!entry->handle) { + handle = zs_malloc(zram->mem_pool, len, flags); + if (!handle) { kfree(entry); return NULL; } + zram_dedup_init_entry(zram, entry, handle, len); + atomic64_add(sizeof(*entry), &zram->stats.meta_data_size); + return entry; } -static inline void zram_entry_free(struct zram *zram, - struct zram_entry *entry) +void zram_entry_free(struct zram *zram, struct zram_entry *entry) + { + if (!zram_dedup_put_entry(zram, entry)) + return; + zs_free(zram->mem_pool, entry->handle); kfree(entry); + + atomic64_sub(sizeof(*entry), &zram->stats.meta_data_size); } static void zram_meta_free(struct zram *zram, u64 disksize) @@ -946,6 +957,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize) zram_free_page(zram, index); zs_destroy_pool(zram->mem_pool); + zram_dedup_fini(zram); vfree(zram->table); } @@ -966,6 +978,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); + + if (zram_dedup_init(zram, num_pages)) { + vfree(zram->table); + zs_destroy_pool(zram->mem_pool); + return false; + } + return true; } @@ -1125,6 +1144,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, void *src, *dst, *mem; struct zcomp_strm *zstrm; struct page *page = bvec->bv_page; + u32 checksum; unsigned long element = 0; enum zram_pageflags flags = 0; bool allow_wb = true; @@ -1139,6 +1159,12 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, } kunmap_atomic(mem); + entry = zram_dedup_find(zram, page, &checksum); + if (entry) { + comp_len = entry->len; + goto out; + } + compress_again: zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); @@ -1220,6 +1246,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, zcomp_stream_put(zram->comp); zs_unmap_object(zram->mem_pool, entry->handle); atomic64_add(comp_len, &zram->stats.compr_data_size); + zram_dedup_insert(zram, entry, checksum); out: /* * Free memory associated with this sector diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index a90023138db5..2323355c6efc 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,8 +18,11 @@ #include #include #include +#include #include "zcomp.h" +#include "zram_dedup.h" + #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) @@ -55,6 +58,10 @@ enum zram_pageflags { /*-- Data structures */ struct zram_entry { + struct rb_node rb_node; + u32 len; + u32 checksum; + unsigned long refcount; unsigned long handle; }; @@ -83,6 +90,16 @@ struct zram_stats { atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ + atomic64_t dup_data_size; /* + * compressed size of pages + * duplicated + */ + atomic64_t meta_data_size; /* size of zram_entries */ +}; + +struct zram_hash { + spinlock_t lock; + struct rb_root rb_root; }; struct zram { @@ -90,6 +107,8 @@ struct zram { struct zs_pool *mem_pool; struct zcomp *comp; struct gendisk *disk; + struct zram_hash *hash; + size_t hash_size; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; /* @@ -120,4 +139,6 @@ struct zram { struct dentry *debugfs_dir; #endif }; + +void zram_entry_free(struct zram *zram, struct zram_entry *entry); #endif