Merge branch 'era' of github.com:jthornber/thin-provisioning-tools into era

This commit is contained in:
Joe Thornber 2014-01-08 10:50:26 +00:00
commit be5fa59f90
20 changed files with 460 additions and 19 deletions

View File

@ -36,6 +36,7 @@ all: $(PROGRAMS)
SOURCE=\
base/base64.cc \
base/endian_utils.cc \
base/error_state.cc \
\
caching/hint_array.cc \
@ -46,8 +47,9 @@ SOURCE=\
caching/restore_emitter.cc \
caching/xml_format.cc \
\
era/era_detail.cc \
\
persistent-data/checksum.cc \
persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@ -55,6 +57,7 @@ SOURCE=\
persistent-data/transaction_manager.cc \
\
persistent-data/data-structures/bitset.cc \
persistent-data/data-structures/bloom_filter.cc \
persistent-data/data-structures/btree.cc \
\
persistent-data/space_map.cc \
@ -153,8 +156,9 @@ THIN_REPAIR_SOURCE=$(SOURCE)
THIN_RESTORE_SOURCE=$(SOURCE)
THIN_CHECK_SOURCE=\
base/error_state.cc \
base/endian_utils.cc \
\
persistent-data/checksum.cc \
persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@ -165,6 +169,7 @@ THIN_CHECK_SOURCE=\
persistent-data/space-maps/recursive.cc \
persistent-data/space-maps/careful_alloc.cc \
persistent-data/transaction_manager.cc \
\
thin-provisioning/device_tree.cc \
thin-provisioning/mapping_tree.cc \
thin-provisioning/metadata.cc \
@ -172,8 +177,9 @@ THIN_CHECK_SOURCE=\
thin-provisioning/superblock.cc
THIN_RMAP_SOURCE=\
base/endian_utils.cc \
\
persistent-data/checksum.cc \
persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@ -232,8 +238,9 @@ thin_metadata_size: thin-provisioning/thin_metadata_size.o
CACHE_CHECK_SOURCE=\
base/base64.cc \
base/error_state.cc \
base/endian_utils.cc \
\
persistent-data/checksum.cc \
persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@ -245,6 +252,7 @@ CACHE_CHECK_SOURCE=\
persistent-data/space-maps/recursive.cc \
persistent-data/space-maps/careful_alloc.cc \
persistent-data/transaction_manager.cc \
\
caching/hint_array.cc \
caching/superblock.cc \
caching/mapping_array.cc \

View File

@ -16,8 +16,8 @@
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef ENDIAN_H
#define ENDIAN_H
#ifndef BASE_ENDIAN_H
#define BASE_ENDIAN_H
#include <endian.h>
#include <stdint.h>
@ -25,7 +25,6 @@
//----------------------------------------------------------------
// FIXME: rename to endian
namespace base {
// These are just little wrapper types to make the compiler

View File

@ -1,5 +1,5 @@
#include "base/endian_utils.h"
#include "caching/mapping_array.h"
#include "persistent-data/endian_utils.h"
#include <set>

View File

@ -1,10 +1,11 @@
#ifndef CACHE_METADATA_H
#define CACHE_METADATA_H
#include "base/endian_utils.h"
#include "persistent-data/block.h"
#include "persistent-data/data-structures/array.h"
#include "persistent-data/data-structures/bitset.h"
#include "persistent-data/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/transaction_manager.h"

View File

@ -1,7 +1,7 @@
#ifndef CACHE_SUPERBLOCK_H
#define CACHE_SUPERBLOCK_H
#include "persistent-data/endian_utils.h"
#include "base/endian_utils.h"
#include "persistent-data/data-structures/btree.h"
#include <set>

36
era/era_detail.cc Normal file
View File

@ -0,0 +1,36 @@
#include "era/era_detail.h"
#include <stdexcept>
using namespace base;
using namespace era;
//----------------------------------------------------------------
namespace {
le32 pack_hash_detail(uint32_t hash1, uint32_t hash2, uint32_t nr_probes) {
throw std::runtime_error("not implemented");
}
void unpack_hash_detail(le32 packed, uint32_t &hash1, uint32_t &hash2, uint32_t &nr_probes) {
throw std::runtime_error("not implemented");
}
}
void
era_detail_traits::unpack(disk_type const &disk, value_type &value)
{
value.nr_bits = to_cpu<uint32_t>(disk.nr_bits);
unpack_hash_detail(disk.hash_fns_and_probes, value.hash1, value.hash2, value.nr_probes);
value.bloom_root = to_cpu<uint64_t>(disk.bloom_root);
}
void
era_detail_traits::pack(value_type const &value, disk_type &disk)
{
disk.nr_bits = to_disk<le32>(value.nr_bits);
disk.hash_fns_and_probes = pack_hash_detail(value.hash1, value.hash2, value.nr_probes);
disk.bloom_root = to_disk<le64>(value.bloom_root);
}
//----------------------------------------------------------------

36
era/era_detail.h Normal file
View File

@ -0,0 +1,36 @@
#ifndef ERA_DETAIL_H
#define ERA_DETAIL_H
#include "base/endian_utils.h"
//----------------------------------------------------------------
namespace era {
struct era_detail_disk {
base::le32 nr_bits;
base::le32 hash_fns_and_probes;
base::le64 bloom_root;
} __attribute__ ((packed));
struct era_detail {
uint32_t nr_bits;
uint32_t hash1;
uint32_t hash2;
uint32_t nr_probes;
uint64_t bloom_root;
};
struct era_detail_traits {
typedef era_detail_disk disk_type;
typedef era_detail value_type;
static void unpack(disk_type const &disk, value_type &value);
static void pack(value_type const &value, disk_type &disk);
};
}
//----------------------------------------------------------------
#endif

View File

@ -19,7 +19,7 @@
#ifndef ARRAY_BLOCK_H
#define ARRAY_BLOCK_H
#include "persistent-data/endian_utils.h"
#include "base/endian_utils.h"
//----------------------------------------------------------------

View File

@ -46,6 +46,10 @@ namespace persistent_data {
return array_.get_root();
}
unsigned get_nr_bits() const {
return nr_bits_;
}
void grow(unsigned new_nr_bits, bool default_value) {
pad_last_block(default_value);
resize_array(new_nr_bits, default_value);
@ -184,7 +188,7 @@ namespace persistent_data {
if (n >= nr_bits_) {
std::ostringstream str;
str << "bitset index out of bounds ("
<< n << " >= " << nr_bits_ << endl;
<< n << " >= " << nr_bits_ << ")";
throw runtime_error(str.str());
}
}
@ -214,6 +218,12 @@ persistent_data::bitset::get_root() const
return impl_->get_root();
}
unsigned
bitset::get_nr_bits() const
{
return impl_->get_nr_bits();
}
void
persistent_data::bitset::grow(unsigned new_nr_bits, bool default_value)
{

View File

@ -16,8 +16,8 @@
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BITSET_H
#define BITSET_H
#ifndef PERSISTENT_DATA_DATA_STRUCTURES_BITSET_H
#define PERSISTENT_DATA_DATA_STRUCTURES_BITSET_H
#include "persistent-data/run.h"
@ -54,6 +54,7 @@ namespace persistent_data {
bitset(tm_ptr tm);
bitset(tm_ptr tm, block_address root, unsigned nr_bits);
block_address get_root() const;
unsigned get_nr_bits() const;
void grow(unsigned new_nr_bits, bool default_value);
void destroy();

View File

@ -0,0 +1,146 @@
#include "persistent-data/data-structures/bloom_filter.h"
#include <stdexcept>
using namespace persistent_data;
//----------------------------------------------------------------
namespace {
static const uint64_t m1 = 0x9e37fffffffc0001UL;
static const unsigned bits = 18;
static uint32_t hash1(block_address const &b) {
return (b * m1) >> bits;
}
static uint32_t hash2(block_address const &b) {
uint32_t n = b;
n = n ^ (n >> 16);
n = n * 0x85ebca6bu;
n = n ^ (n >> 13);
n = n * 0xc2b2ae35u;
n = n ^ (n >> 16);
return n;
}
void check_power_of_two(unsigned nr_bits) {
if (nr_bits & (nr_bits - 1))
throw std::runtime_error("bloom filter needs a power of two nr_bits");
}
}
//----------------------------------------------------------------
bloom_filter::bloom_filter(tm_ptr tm,
unsigned nr_bits, unsigned nr_probes)
: tm_(tm),
bits_(tm),
nr_probes_(nr_probes),
mask_(nr_bits - 1)
{
check_power_of_two(nr_bits);
bits_.grow(nr_bits, false);
}
bloom_filter::bloom_filter(tm_ptr tm, block_address root,
unsigned nr_bits, unsigned nr_probes)
: tm_(tm),
bits_(tm, root, nr_bits),
nr_probes_(nr_probes),
mask_(nr_bits - 1)
{
check_power_of_two(nr_bits);
}
block_address
bloom_filter::get_root() const
{
return bits_.get_root();
}
bool
bloom_filter::test(uint64_t b)
{
vector<unsigned> probes(nr_probes_);
fill_probes(b, probes);
for (unsigned p = 0; p < nr_probes_; p++)
if (!bits_.get(probes[p]))
return false;
return true;
}
void
bloom_filter::set(uint64_t b)
{
vector<unsigned> probes(nr_probes_);
fill_probes(b, probes);
for (unsigned p = 0; p < nr_probes_; p++)
bits_.set(probes[p], true);
}
void
bloom_filter::flush()
{
bits_.flush();
}
void
bloom_filter::fill_probes(block_address b, vector<unsigned> &probes) const
{
uint32_t h1 = hash1(b) & mask_;
uint32_t h2 = hash2(b) & mask_;
probes[0] = h1;
for (unsigned p = 1; p < nr_probes_; p++) {
h1 = (h1 + h2) & mask_;
h2 = (h2 + p) & mask_;
probes[p] = h1;
}
}
void
bloom_filter::print_debug(ostream &out)
{
print_residency(out);
map<unsigned, unsigned> runs;
for (unsigned i = 0; i < bits_.get_nr_bits();) {
bool v = bits_.get(i);
unsigned run_length = 1;
while (++i < bits_.get_nr_bits() && bits_.get(i) == v)
run_length++;
map<unsigned, unsigned>::iterator it = runs.find(run_length);
if (it != runs.end())
it->second++;
else
runs.insert(make_pair(run_length, 1));
}
{
map<unsigned, unsigned>::const_iterator it;
for (it = runs.begin(); it != runs.end(); ++it)
out << it->first << ": " << it->second << endl;
}
}
void
bloom_filter::print_residency(ostream &out)
{
unsigned count = 0;
for (unsigned i = 0; i < bits_.get_nr_bits(); i++)
if (bits_.get(i))
count++;
out << "residency: " << count << "/" << bits_.get_nr_bits() << endl;
}
//----------------------------------------------------------------

View File

@ -0,0 +1,47 @@
#ifndef PERSISTENT_DATA_DATA_STRUCTURES_BLOOM_FILTER_H
#define PERSISTENT_DATA_DATA_STRUCTURES_BLOOM_FILTER_H
#include "persistent-data/transaction_manager.h"
#include "persistent-data/data-structures/bitset.h"
#include <boost/shared_ptr.hpp>
//----------------------------------------------------------------
namespace persistent_data {
class bloom_filter {
public:
typedef boost::shared_ptr<bloom_filter> ptr;
typedef typename persistent_data::transaction_manager::ptr tm_ptr;
// nr_bits must be a power of two
bloom_filter(tm_ptr tm,
unsigned nr_bits, unsigned nr_probes);
bloom_filter(tm_ptr tm, block_address root,
unsigned nr_bits_power, unsigned nr_probes);
block_address get_root() const;
bool test(uint64_t b); // not const due to caching effects in bitset
void set(uint64_t b);
void flush();
void print_debug(ostream &out);
private:
void print_residency(ostream &out);
void fill_probes(block_address b, vector<unsigned> &probes) const;
tm_ptr tm_;
unsigned nr_bits_;
persistent_data::bitset bits_;
unsigned nr_probes_;
uint64_t mask_;
};
}
//----------------------------------------------------------------
#endif

View File

@ -19,7 +19,7 @@
#ifndef BTREE_H
#define BTREE_H
#include "persistent-data/endian_utils.h"
#include "base/endian_utils.h"
#include "persistent-data/transaction_manager.h"
#include "persistent-data/data-structures/ref_counter.h"

View File

@ -16,6 +16,8 @@
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "base/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/space-maps/disk_structures.h"
#include "persistent-data/space-maps/recursive.h"
@ -23,7 +25,6 @@
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/checksum.h"
#include "persistent-data/endian_utils.h"
#include "persistent-data/math_utils.h"
#include "persistent-data/transaction_manager.h"

View File

@ -19,7 +19,7 @@
#ifndef SPACE_MAP_DISK_STRUCTURES_H
#define SPACE_MAP_DISK_STRUCTURES_H
#include "persistent-data/endian_utils.h"
#include "base/endian_utils.h"
// FIXME: what's this included for?
#include "persistent-data/data-structures/btree.h"

View File

@ -19,9 +19,10 @@
#ifndef METADATA_LL_H
#define METADATA_LL_H
#include "base/endian_utils.h"
#include "persistent-data/block.h"
#include "persistent-data/data-structures/btree.h"
#include "persistent-data/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/transaction_manager.h"

View File

@ -1,8 +1,9 @@
#ifndef THIN_SUPERBLOCK_H
#define THIN_SUPERBLOCK_H
#include "base/endian_utils.h"
#include "persistent-data/block.h"
#include "persistent-data/endian_utils.h"
#include "persistent-data/data-structures/ref_counter.h"
//----------------------------------------------------------------

View File

@ -50,6 +50,7 @@ TEST_SOURCE=\
unit-tests/base64_t.cc \
unit-tests/bitset_t.cc \
unit-tests/block_t.cc \
unit-tests/bloom_filter_t.cc \
unit-tests/btree_t.cc \
unit-tests/btree_counter_t.cc \
unit-tests/btree_damage_visitor_t.cc \

View File

@ -0,0 +1,153 @@
#include "gmock/gmock.h"
#include "persistent-data/data-structures/bloom_filter.h"
#include "persistent-data/transaction_manager.h"
#include "persistent-data/space-maps/core.h"
#include "persistent-data/data-structures/array_block.h"
#include "test_utils.h"
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include <utility>
#include <deque>
#include <vector>
#include <set>
using namespace persistent_data;
using namespace std;
using namespace test;
using namespace testing;
//----------------------------------------------------------------
namespace {
block_address const BLOCK_SIZE = 4096;
block_address const NR_BLOCKS = 102400;
block_address const SUPERBLOCK = 0;
//--------------------------------
class BloomFilterTests : public Test {
public:
BloomFilterTests()
: bm_(create_bm<BLOCK_SIZE>(NR_BLOCKS)),
sm_(setup_core_map()),
tm_(new transaction_manager(bm_, sm_)) {
}
set<block_address> generate_random_blocks(unsigned count,
block_address max = std::numeric_limits<uint64_t>::max()) {
set<block_address> r;
using namespace boost::random;
mt19937 rng;
uniform_int_distribution<uint64_t> uniform_dist(0, max);
while (r.size() < count) {
block_address b = uniform_dist(rng);
r.insert(b);
}
return r;
}
void commit() {
block_manager<>::write_ref superblock(bm_->superblock(SUPERBLOCK));
}
space_map::ptr setup_core_map() {
space_map::ptr sm(new core_map(NR_BLOCKS));
sm->inc(SUPERBLOCK);
return sm;
}
with_temp_directory dir_;
block_manager<>::ptr bm_;
space_map::ptr sm_;
transaction_manager::ptr tm_;
};
}
//----------------------------------------------------------------
TEST_F(BloomFilterTests, nr_bits_must_be_a_power_of_two)
{
ASSERT_THROW(bloom_filter f(tm_, 1023, 3), runtime_error);
}
TEST_F(BloomFilterTests, can_create_a_bloom_filter)
{
bloom_filter f(tm_, 1024, 3);
}
TEST_F(BloomFilterTests, no_false_negatives)
{
bloom_filter f(tm_, 4096, 6);
set<block_address> bs = generate_random_blocks(1000);
set<block_address>::const_iterator it;
for (it = bs.begin(); it != bs.end(); ++it)
f.set(*it);
for (it = bs.begin(); it != bs.end(); ++it)
ASSERT_THAT(f.test(*it), Eq(true));
}
TEST_F(BloomFilterTests, reload_works)
{
block_address root;
set<block_address> bs = generate_random_blocks(1000);
{
bloom_filter f(tm_, 4096, 6);
set<block_address>::const_iterator it;
for (it = bs.begin(); it != bs.end(); ++it)
f.set(*it);
f.flush();
root = f.get_root();
commit();
}
{
bloom_filter f(tm_, root, 4096, 6);
set<block_address>::const_iterator it;
for (it = bs.begin(); it != bs.end(); ++it)
ASSERT_THAT(f.test(*it), Eq(true));
}
}
TEST_F(BloomFilterTests, count_false_positives)
{
block_address nr_blocks = 1024 * 1024;
block_address written_blocks = nr_blocks / 100;
unsigned shift = 1;
while ((1ull << shift) < (16 * written_blocks))
shift++;
cerr << "bitset size: " << ((1 << shift) / (8 * 1024)) << "k" << endl;
bloom_filter f(tm_, 1 << shift, 6);
set<block_address> bs = generate_random_blocks(written_blocks, nr_blocks);
set<block_address>::const_iterator it;
for (it = bs.begin(); it != bs.end(); ++it)
f.set(*it);
// f.print_debug(cerr);
unsigned count = 0;
for (unsigned i = 0; i < nr_blocks; i++)
if (!bs.count(i) && f.test(i))
count++;
cerr << count << " false positives out of " << nr_blocks << ", "
<< static_cast<double>(count * 100) / static_cast<double>(nr_blocks)
<< "%" << endl;
}
//----------------------------------------------------------------