Move source into separate sub directories.

This commit is contained in:
Joe Thornber
2013-01-02 12:55:41 +00:00
parent c0d2eb9bc6
commit 282e98a6b1
55 changed files with 85 additions and 336 deletions

265
persistent-data/block.h Normal file
View File

@@ -0,0 +1,265 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BLOCK_H
#define BLOCK_H
#include "persistent-data/cache.h"
#include <stdint.h>
#include <map>
#include <vector>
#include <boost/noncopyable.hpp>
#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
#include <string.h>
#include <malloc.h>
//----------------------------------------------------------------
namespace persistent_data {
uint32_t const MD_BLOCK_SIZE = 4096;
typedef uint64_t block_address;
template <uint32_t BlockSize = MD_BLOCK_SIZE, uint32_t Alignment = 512>
class buffer : private boost::noncopyable {
public:
unsigned char &operator[](unsigned index) {
if (index >= BlockSize)
throw std::runtime_error("buffer index out of bounds");
return data_[index];
}
unsigned char const &operator[](unsigned index) const {
if (index >= BlockSize)
throw std::runtime_error("buffer index out of bounds");
return data_[index];
}
unsigned char *raw() {
return data_;
}
unsigned char const *raw() const {
return data_;
}
static void *operator new(size_t s) {
return ::memalign(Alignment, s);
}
static void operator delete(void *p) {
free(p);
}
private:
unsigned char data_[BlockSize];
};
template <uint32_t BlockSize = MD_BLOCK_SIZE>
class block_io : private boost::noncopyable {
public:
typedef boost::shared_ptr<block_io> ptr;
block_io(std::string const &path, block_address nr_blocks, bool writeable = false);
~block_io();
block_address get_nr_blocks() const {
return nr_blocks_;
}
void read_buffer(block_address location, buffer<BlockSize> &buf) const;
void write_buffer(block_address location, buffer<BlockSize> const &buf);
private:
int fd_;
block_address nr_blocks_;
bool writeable_;
};
template <uint32_t BlockSize = MD_BLOCK_SIZE>
class block_manager : private boost::noncopyable {
public:
typedef boost::shared_ptr<block_manager> ptr;
block_manager(std::string const &path,
block_address nr_blocks,
unsigned max_concurrent_locks,
bool writeable = false);
class validator {
public:
typedef boost::shared_ptr<validator> ptr;
virtual ~validator() {}
virtual void check(buffer<BlockSize> const &b, block_address location) const = 0;
virtual void prepare(buffer<BlockSize> &b, block_address location) const = 0;
};
class noop_validator : public validator {
public:
void check(buffer<BlockSize> const &b, block_address location) const {}
void prepare(buffer<BlockSize> &b, block_address location) const {}
};
enum block_type {
BT_SUPERBLOCK,
BT_NORMAL
};
struct block : private boost::noncopyable {
typedef boost::shared_ptr<block> ptr;
block(typename block_io<BlockSize>::ptr io,
block_address location,
block_type bt,
typename validator::ptr v,
bool zero = false);
~block();
void check_read_lockable() const {
// FIXME: finish
}
void check_write_lockable() const {
// FIXME: finish
}
void flush();
typename block_io<BlockSize>::ptr io_;
block_address location_;
std::auto_ptr<buffer<BlockSize> > data_;
typename validator::ptr validator_;
block_type bt_;
bool dirty_;
};
typedef typename block::ptr block_ptr; // FIXME: remove
class read_ref {
public:
read_ref(block_manager<BlockSize> const &bm,
block_ptr b);
read_ref(read_ref const &rhs);
virtual ~read_ref();
read_ref const &operator =(read_ref const &rhs);
block_address get_location() const;
buffer<BlockSize> const &data() const;
protected:
block_manager<BlockSize> const &bm_;
block_ptr block_;
unsigned *holders_;
};
// Inherited from read_ref, since you can read a block that's write
// locked.
class write_ref : public read_ref {
public:
write_ref(block_manager<BlockSize> const &bm,
typename block::ptr b);
using read_ref::data;
buffer<BlockSize> &data();
};
// Locking methods
read_ref
read_lock(block_address location,
typename validator::ptr v =
typename validator::ptr(new noop_validator())) const;
write_ref
write_lock(block_address location,
typename validator::ptr v =
typename validator::ptr(new noop_validator()));
write_ref
write_lock_zero(block_address location,
typename validator::ptr v =
typename validator::ptr(new noop_validator()));
// The super block is the one that should be written last.
// Unlocking this block triggers the following events:
//
// i) synchronous write of all dirty blocks _except_ the
// superblock.
//
// ii) synchronous write of superblock
//
// If any locks are held at the time of the superblock
// being unlocked then an exception will be thrown.
write_ref superblock(block_address b,
typename validator::ptr v =
typename validator::ptr(new noop_validator()));
write_ref superblock_zero(block_address b,
typename validator::ptr v =
typename validator::ptr(new noop_validator()));
block_address get_nr_blocks() const;
void flush() const;
private:
void check(block_address b) const;
void write_block(block_ptr b) const;
enum lock_type {
READ_LOCK,
WRITE_LOCK
};
struct cache_traits {
typedef typename block::ptr value_type;
typedef block_address key_type;
static key_type get_key(value_type const &v) {
return v->location_;
}
};
typename block_io<BlockSize>::ptr io_;
mutable base::cache<cache_traits> cache_;
// FIXME: we need a dirty list as well as a cache
typedef std::map<block_address, std::pair<lock_type, unsigned> > held_map;
mutable held_map held_locks_;
};
// A little utility to help build validators
inline block_manager<>::validator::ptr
mk_validator(block_manager<>::validator *v) {
return block_manager<>::validator::ptr(v);
}
}
#include "block.tcc"
//----------------------------------------------------------------
#endif

372
persistent-data/block.tcc Normal file
View File

@@ -0,0 +1,372 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "block.h"
#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <boost/bind.hpp>
#include <stdexcept>
#include <sstream>
using namespace boost;
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
template <uint32_t BlockSize>
block_io<BlockSize>::block_io(std::string const &path, block_address nr_blocks, bool writeable)
: nr_blocks_(nr_blocks),
writeable_(writeable)
{
// fd_ = ::open(path.c_str(), writeable ? (O_RDWR | O_CREAT) : O_RDONLY, 0666);
fd_ = ::open(path.c_str(), O_DIRECT | O_SYNC | (writeable ? O_RDWR : O_RDONLY), 0666);
if (fd_ < 0)
throw std::runtime_error("couldn't open file");
}
template <uint32_t BlockSize>
block_io<BlockSize>::~block_io()
{
::close(fd_);
}
template <uint32_t BlockSize>
void
block_io<BlockSize>::read_buffer(block_address location, buffer<BlockSize> &buffer) const
{
off_t r;
r = ::lseek(fd_, BlockSize * location, SEEK_SET);
if (r == (off_t) -1)
throw std::runtime_error("lseek failed");
ssize_t n;
size_t remaining = BlockSize;
unsigned char *buf = buffer.raw();
do {
n = ::read(fd_, buf, remaining);
if (n > 0) {
remaining -= n;
buf += n;
}
} while (remaining && ((n > 0) || (n == EINTR) || (n == EAGAIN)));
if (n < 0)
throw std::runtime_error("read failed");
}
template <uint32_t BlockSize>
void
block_io<BlockSize>::write_buffer(block_address location, buffer<BlockSize> const &buffer)
{
off_t r;
r = ::lseek(fd_, BlockSize * location, SEEK_SET);
if (r == (off_t) -1)
throw std::runtime_error("lseek failed");
ssize_t n;
size_t remaining = BlockSize;
unsigned char const *buf = buffer.raw();
do {
n = ::write(fd_, buf, remaining);
if (n > 0) {
remaining -= n;
buf += n;
}
} while (remaining && ((n > 0) || (n == EINTR) || (n == EAGAIN)));
if (n < 0) {
std::ostringstream out;
out << "write failed to block " << location
<< ", block size = " << BlockSize
<< ", remaining = " << remaining
<< ", n = " << n
<< ", errno = " << errno
<< ", fd_ = " << fd_
<< std::endl;
throw std::runtime_error(out.str());
}
}
//----------------------------------------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::block::block(typename block_io<BlockSize>::ptr io,
block_address location,
block_type bt,
typename validator::ptr v,
bool zero)
: io_(io),
location_(location),
data_(new buffer<BlockSize>()),
validator_(v),
bt_(bt),
dirty_(false)
{
if (zero) {
memset(data_->raw(), 0, BlockSize);
dirty_ = true;
} else {
io_->read_buffer(location_, *data_);
validator_->check(*data_, location_);
}
}
template <uint32_t BlockSize>
block_manager<BlockSize>::block::~block()
{
flush();
}
template <uint32_t BlockSize>
void
block_manager<BlockSize>::block::flush()
{
if (dirty_) {
validator_->prepare(*data_, location_);
io_->write_buffer(location_, *data_);
}
}
//----------------------------------------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(block_manager<BlockSize> const &bm,
block_ptr b)
: bm_(bm),
block_(b),
holders_(new unsigned)
{
*holders_ = 1;
}
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(read_ref const &rhs)
: bm_(rhs.bm_),
block_(rhs.block_),
holders_(rhs.holders_)
{
(*holders_)++;
}
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::~read_ref()
{
if (!--(*holders_)) {
if (block_->bt_ == BT_SUPERBLOCK) {
bm_.flush();
bm_.cache_.put(block_);
bm_.flush();
} else
bm_.cache_.put(block_);
delete holders_;
}
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::read_ref const &
block_manager<BlockSize>::read_ref::operator =(read_ref const &rhs)
{
if (this != &rhs) {
block_ = rhs.block_;
bm_ = rhs.bm_;
holders_ = rhs.holders_;
(*holders_)++;
}
}
template <uint32_t BlockSize>
block_address
block_manager<BlockSize>::read_ref::get_location() const
{
return block_->location_;
}
template <uint32_t BlockSize>
buffer<BlockSize> const &
block_manager<BlockSize>::read_ref::data() const
{
return *block_->data_;
}
//--------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::write_ref::write_ref(block_manager<BlockSize> const &bm,
block_ptr b)
: read_ref(bm, b)
{
b->dirty_ = true;
}
template <uint32_t BlockSize>
buffer<BlockSize> &
block_manager<BlockSize>::write_ref::data()
{
return *read_ref::block_->data_;
}
//----------------------------------------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::block_manager(std::string const &path,
block_address nr_blocks,
unsigned max_concurrent_blocks,
bool writeable)
: io_(new block_io<BlockSize>(path, nr_blocks, writeable)),
cache_(max(64u, max_concurrent_blocks))
{
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::read_ref
block_manager<BlockSize>::read_lock(block_address location,
typename block_manager<BlockSize>::validator::ptr v) const
{
check(location);
boost::optional<block_ptr> cached_block = cache_.get(location);
if (cached_block) {
(*cached_block)->check_read_lockable();
return read_ref(*this, *cached_block);
}
block_ptr b(new block(io_, location, BT_NORMAL, v));
cache_.insert(b);
return read_ref(*this, b);
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::write_ref
block_manager<BlockSize>::write_lock(block_address location,
typename block_manager<BlockSize>::validator::ptr v)
{
check(location);
boost::optional<block_ptr> cached_block = cache_.get(location);
if (cached_block) {
(*cached_block)->check_write_lockable();
return write_ref(*this, *cached_block);
}
block_ptr b(new block(io_, location, BT_NORMAL, v));
cache_.insert(b);
return write_ref(*this, b);
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::write_ref
block_manager<BlockSize>::write_lock_zero(block_address location,
typename block_manager<BlockSize>::validator::ptr v)
{
check(location);
boost::optional<block_ptr> cached_block = cache_.get(location);
if (cached_block) {
(*cached_block)->check_write_lockable();
memset((*cached_block)->data_->raw(), 0, BlockSize);
return write_ref(*this, *cached_block);
}
block_ptr b(new block(io_, location, BT_NORMAL, v, true));
cache_.insert(b);
return write_ref(*this, b);
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::write_ref
block_manager<BlockSize>::superblock(block_address location,
typename block_manager<BlockSize>::validator::ptr v)
{
check(location);
boost::optional<block_ptr> cached_block = cache_.get(location);
if (cached_block) {
(*cached_block)->check_write_lockable();
(*cached_block)->bt_ = BT_SUPERBLOCK;
(*cached_block)->validator_ = v;
return write_ref(*this, *cached_block);
}
block_ptr b(new block(io_, location, BT_SUPERBLOCK, v));
cache_.insert(b);
return write_ref(*this, b);
}
template <uint32_t BlockSize>
typename block_manager<BlockSize>::write_ref
block_manager<BlockSize>::superblock_zero(block_address location,
typename block_manager<BlockSize>::validator::ptr v)
{
check(location);
boost::optional<block_ptr> cached_block = cache_.get(location);
if (cached_block) {
(*cached_block)->check_write_lockable();
memset((*cached_block)->data_->raw(), 0, BlockSize); // FIXME: add a zero method to buffer
(*cached_block)->validator_ = v;
return write_ref(*this, *cached_block);
}
block_ptr b(new block(io_, location, BT_SUPERBLOCK,
mk_validator(new noop_validator), true));
b->validator_ = v;
cache_.insert(b);
return write_ref(*this, b);
}
template <uint32_t BlockSize>
void
block_manager<BlockSize>::check(block_address b) const
{
if (b >= io_->get_nr_blocks())
throw std::runtime_error("block address out of bounds");
}
template <uint32_t BlockSize>
block_address
block_manager<BlockSize>::get_nr_blocks() const
{
return io_->get_nr_blocks();
}
template <uint32_t BlockSize>
void
block_manager<BlockSize>::write_block(block_ptr b) const
{
b->flush();
}
template <uint32_t BlockSize>
void
block_manager<BlockSize>::flush() const
{
cache_.iterate_unheld(
boost::bind(&block_manager<BlockSize>::write_block, this, _1));
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,59 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BLOCK_COUNTER_H
#define BLOCK_COUNTER_H
#include "block.h"
//----------------------------------------------------------------
namespace persistent_data {
//----------------------------------------------------------------
// Little helper class that keeps track of how many times blocks
// are referenced.
//----------------------------------------------------------------
class block_counter {
public:
typedef std::map<block_address, unsigned> count_map;
void inc(block_address b) {
count_map::iterator it = counts_.find(b);
if (it == counts_.end())
counts_.insert(make_pair(b, 1));
else
it->second++;
}
unsigned get_count(block_address b) const {
count_map::const_iterator it = counts_.find(b);
return (it == counts_.end()) ? 0 : it->second;
}
count_map const &get_counts() const {
return counts_;
}
private:
count_map counts_;
};
}
//----------------------------------------------------------------
#endif

371
persistent-data/btree.h Normal file
View File

@@ -0,0 +1,371 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BTREE_H
#define BTREE_H
#include "endian_utils.h"
#include "transaction_manager.h"
#include <boost/noncopyable.hpp>
#include <boost/optional.hpp>
#include <list>
//----------------------------------------------------------------
namespace persistent_data {
template <typename ValueType>
class NoOpRefCounter {
public:
void inc(ValueType const &v) {}
void dec(ValueType const &v) {}
};
struct uint64_traits {
typedef base::__le64 disk_type;
typedef uint64_t value_type;
typedef NoOpRefCounter<uint64_t> ref_counter;
static void unpack(disk_type const &disk, value_type &value) {
value = base::to_cpu<uint64_t>(disk);
}
static void pack(value_type const &value, disk_type &disk) {
disk = base::to_disk<base::__le64>(value);
}
};
namespace btree_detail {
using namespace base;
using namespace std;
using namespace boost;
uint32_t const BTREE_CSUM_XOR = 121107;
//------------------------------------------------
// On disk data layout for btree nodes
enum node_flags {
INTERNAL_NODE = 1,
LEAF_NODE = 1 << 1
};
struct node_header {
__le32 csum;
__le32 flags;
__le64 blocknr; /* which block this node is supposed to live in */
__le32 nr_entries;
__le32 max_entries;
__le32 value_size;
__le32 padding;
} __attribute__((packed));
struct disk_node {
struct node_header header;
__le64 keys[0];
} __attribute__((packed));
enum node_type {
INTERNAL,
LEAF
};
//------------------------------------------------
// Class that acts as an interface over the raw little endian btree
// node data.
template <typename ValueTraits>
class node_ref {
public:
explicit node_ref(block_address b, disk_node *raw);
uint32_t get_checksum() const;
block_address get_location() const {
return location_;
}
block_address get_block_nr() const;
node_type get_type() const;
void set_type(node_type t);
unsigned get_nr_entries() const;
void set_nr_entries(unsigned n);
unsigned get_max_entries() const;
void set_max_entries(unsigned n);
// FIXME: remove this, and get the constructor to do it.
void set_max_entries(); // calculates the max for you.
size_t get_value_size() const;
void set_value_size(size_t);
uint64_t key_at(unsigned i) const;
void set_key(unsigned i, uint64_t k);
typename ValueTraits::value_type value_at(unsigned i) const;
void set_value(unsigned i,
typename ValueTraits::value_type const &v);
// Increments the nr_entries field
void insert_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v);
// Does not increment nr_entries
void overwrite_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v);
// Copies entries from another node, appends them
// to the back of this node. Adjusts nr_entries.
void copy_entries(node_ref const &rhs,
unsigned begin,
unsigned end);
// Various searches
int bsearch(uint64_t key, int want_hi) const;
optional<unsigned> exact_search(uint64_t key) const;
int lower_bound(uint64_t key) const;
template <typename RefCounter>
void inc_children(RefCounter &rc);
disk_node *raw() {
return raw_;
}
disk_node const *raw() const {
return raw_;
}
private:
static unsigned calc_max_entries(void);
void *key_ptr(unsigned i) const;
void *value_ptr(unsigned i) const;
block_address location_;
disk_node *raw_;
};
//------------------------------------------------
//
template <typename ValueTraits>
node_ref<ValueTraits>
to_node(typename block_manager<>::read_ref &b)
{
// FIXME: this should return a const read_ref somehow.
return node_ref<ValueTraits>(
b.get_location(),
reinterpret_cast<disk_node *>(
const_cast<unsigned char *>(b.data().raw())));
}
template <typename ValueTraits>
node_ref<ValueTraits>
to_node(typename block_manager<>::write_ref &b)
{
return node_ref<ValueTraits>(
b.get_location(),
reinterpret_cast<disk_node *>(
const_cast<unsigned char *>(b.data().raw())));
}
class ro_spine : private noncopyable {
public:
ro_spine(transaction_manager::ptr tm)
: tm_(tm) {
}
void step(block_address b);
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(spine_.back());
}
private:
transaction_manager::ptr tm_;
std::list<block_manager<>::read_ref> spine_;
};
class shadow_spine : private noncopyable {
public:
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
shadow_spine(transaction_manager::ptr tm)
: tm_(tm) {
}
// true if the children of the shadow need incrementing
bool step(block_address b);
void step(transaction_manager::write_ref b) {
spine_.push_back(b);
if (spine_.size() == 1)
root_ = spine_.front().get_location();
else if (spine_.size() > 2)
spine_.pop_front();
}
void pop() {
spine_.pop_back();
}
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(spine_.back());
}
block_address get_block() const {
return spine_.back().get_location();
}
bool has_parent() const {
return spine_.size() > 1;
}
node_ref<uint64_traits> get_parent() {
if (spine_.size() < 2)
throw std::runtime_error("no parent");
return to_node<uint64_traits>(spine_.front());
}
block_address get_parent_location() const {
return spine_.front().get_location();
}
block_address get_root() const {
return root_;
}
private:
transaction_manager::ptr tm_;
std::list<block_manager<>::write_ref> spine_;
block_address root_;
};
}
template <unsigned Levels, typename ValueTraits>
class btree {
public:
typedef boost::shared_ptr<btree<Levels, ValueTraits> > ptr;
typedef uint64_t key[Levels];
typedef typename ValueTraits::value_type value_type;
typedef boost::optional<value_type> maybe_value;
typedef boost::optional<std::pair<unsigned, value_type> > maybe_pair;
typedef typename block_manager<>::read_ref read_ref;
typedef typename block_manager<>::write_ref write_ref;
typedef typename btree_detail::node_ref<ValueTraits> leaf_node;
typedef typename btree_detail::node_ref<uint64_traits> internal_node;
btree(typename persistent_data::transaction_manager::ptr tm,
typename ValueTraits::ref_counter rc);
btree(typename transaction_manager::ptr tm,
block_address root,
typename ValueTraits::ref_counter rc);
~btree();
maybe_value lookup(key const &key) const;
maybe_pair lookup_le(key const &key) const;
maybe_pair lookup_ge(key const &key) const;
void insert(key const &key, typename ValueTraits::value_type const &value);
void remove(key const &key);
void set_root(block_address root);
block_address get_root() const;
ptr clone() const;
// free the on disk btree when the destructor is called
void destroy();
// Derive a class from this base class if you need to
// inspect the individual nodes that make up a btree.
class visitor {
public:
virtual ~visitor() {}
typedef boost::shared_ptr<visitor> ptr;
// The bool return values indicate whether the walk
// should be continued into sub trees of the node (true == continue).
virtual bool visit_internal(unsigned level, bool sub_root, boost::optional<uint64_t> key,
internal_node const &n) = 0;
virtual bool visit_internal_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
internal_node const &n) = 0;
virtual bool visit_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
leaf_node const &n) = 0;
virtual void visit_complete() {}
};
// Walks the tree in depth first order
void visit(typename visitor::ptr visitor) const;
private:
template <typename ValueTraits2, typename Search>
optional<typename ValueTraits2::value_type>
lookup_raw(btree_detail::ro_spine &spine, block_address block, uint64_t key) const;
template <typename ValueTraits2>
void split_node(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key,
bool top);
template <typename ValueTraits2>
void split_beneath(btree_detail::shadow_spine &spine, uint64_t key);
template <typename ValueTraits2>
void split_sibling(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key);
template <typename ValueTraits2>
bool
insert_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
int *index);
void walk_tree(typename visitor::ptr visitor,
unsigned level, bool root, boost::optional<uint64_t> key,
block_address b) const;
typename persistent_data::transaction_manager::ptr tm_;
bool destroy_;
block_address root_;
NoOpRefCounter<uint64_t> internal_rc_;
typename ValueTraits::ref_counter rc_;
};
};
#include "btree.tcc"
//----------------------------------------------------------------
#endif

782
persistent-data/btree.tcc Normal file
View File

@@ -0,0 +1,782 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "btree.h"
#include "errors.h"
#include "checksum.h"
#include "transaction_manager.h"
#include <iostream>
using namespace base;
using namespace btree_detail;
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
namespace {
struct btree_node_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const {
disk_node const *data = reinterpret_cast<disk_node const *>(&b);
node_header const *n = &data->header;
crc32c sum(BTREE_CSUM_XOR);
sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(n->csum))
throw checksum_error("bad checksum in btree node");
if (to_cpu<uint64_t>(n->blocknr) != location)
throw checksum_error("bad block nr in btree node");
}
virtual void prepare(buffer<> &b, block_address location) const {
disk_node *data = reinterpret_cast<disk_node *>(&b);
node_header *n = &data->header;
n->blocknr = to_disk<base::__le64, uint64_t>(location);
crc32c sum(BTREE_CSUM_XOR);
sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
n->csum = to_disk<base::__le32>(sum.get_sum());
}
};
block_manager<>::validator::ptr
btree_validator() {
return block_manager<>::validator::ptr(new btree_node_validator);
}
}
//----------------------------------------------------------------
inline void
ro_spine::step(block_address b)
{
spine_.push_back(tm_->read_lock(b, btree_validator()));
if (spine_.size() > 2)
spine_.pop_front();
}
inline bool
shadow_spine::step(block_address b)
{
pair<write_ref, bool> p = tm_->shadow(b, btree_validator());
try {
step(p.first);
} catch (...) {
tm_->get_sm()->dec(p.first.get_location());
throw;
}
return p.second;
}
//----------------------------------------------------------------
template <typename ValueTraits>
node_ref<ValueTraits>::node_ref(block_address location, disk_node *raw)
: location_(location),
raw_(raw)
{
}
template <typename ValueTraits>
uint32_t
node_ref<ValueTraits>::get_checksum() const
{
return to_cpu<uint32_t>(raw_->header.csum);
}
template <typename ValueTraits>
block_address
node_ref<ValueTraits>::get_block_nr() const
{
return to_cpu<uint64_t>(raw_->header.blocknr);
}
template <typename ValueTraits>
btree_detail::node_type
node_ref<ValueTraits>::get_type() const
{
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
if (flags & INTERNAL_NODE) {
if (flags & LEAF_NODE)
throw runtime_error("btree node is both internal and leaf");
return INTERNAL;
} else if (flags & LEAF_NODE)
return LEAF;
else
throw runtime_error("unknown node type");
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_type(node_type t)
{
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
switch (t) {
case INTERNAL:
flags = INTERNAL_NODE;
break;
case LEAF:
flags = LEAF_NODE;
break;
}
raw_->header.flags = to_disk<__le32>(flags);
}
template <typename ValueTraits>
unsigned
node_ref<ValueTraits>::get_nr_entries() const
{
return to_cpu<uint32_t>(raw_->header.nr_entries);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_nr_entries(unsigned n)
{
raw_->header.nr_entries = to_disk<__le32>(n);
}
template <typename ValueTraits>
unsigned
node_ref<ValueTraits>::get_max_entries() const
{
return to_cpu<uint32_t>(raw_->header.max_entries);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_max_entries(unsigned n)
{
raw_->header.max_entries = to_disk<__le32>(n);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_max_entries()
{
set_max_entries(calc_max_entries());
}
template <typename ValueTraits>
size_t
node_ref<ValueTraits>::get_value_size() const
{
return to_cpu<uint32_t>(raw_->header.value_size);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_value_size(size_t s)
{
raw_->header.value_size = to_disk<__le32>(static_cast<uint32_t>(s));
}
template <typename ValueTraits>
uint64_t
node_ref<ValueTraits>::key_at(unsigned i) const
{
if (i >= get_nr_entries())
throw runtime_error("key index out of bounds");
return to_cpu<uint64_t>(raw_->keys[i]);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_key(unsigned i, uint64_t k)
{
raw_->keys[i] = to_disk<__le64>(k);
}
template <typename ValueTraits>
typename ValueTraits::value_type
node_ref<ValueTraits>::value_at(unsigned i) const
{
if (i >= get_nr_entries())
throw runtime_error("value index out of bounds");
// We have to copy because of alignment issues.
typename ValueTraits::disk_type d;
::memcpy(&d, value_ptr(i), sizeof(d));
typename ValueTraits::value_type v;
ValueTraits::unpack(d, v);
return v;
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::set_value(unsigned i,
typename ValueTraits::value_type const &v)
{
typename ValueTraits::disk_type d;
ValueTraits::pack(v, d);
::memcpy(value_ptr(i), &d, sizeof(d));
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::insert_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v)
{
unsigned n = get_nr_entries();
if ((n + 1) > get_max_entries())
throw runtime_error("too many entries");
set_nr_entries(n + 1);
::memmove(key_ptr(i + 1), key_ptr(i), sizeof(uint64_t) * (n - i));
::memmove(value_ptr(i + 1), value_ptr(i), sizeof(typename ValueTraits::disk_type) * (n - i));
overwrite_at(i, key, v);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::overwrite_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v)
{
set_key(i, key);
set_value(i, v);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries(node_ref const &rhs,
unsigned begin,
unsigned end)
{
unsigned count = end - begin;
unsigned n = get_nr_entries();
if ((n + count) > get_max_entries())
throw runtime_error("too many entries");
::memcpy(key_ptr(n), rhs.key_ptr(begin), sizeof(uint64_t) * count);
::memcpy(value_ptr(n), rhs.value_ptr(begin), sizeof(typename ValueTraits::disk_type) * count);
set_nr_entries(n + count);
}
template <typename ValueTraits>
int
node_ref<ValueTraits>::bsearch(uint64_t key, int want_hi) const
{
int lo = -1, hi = get_nr_entries();
while(hi - lo > 1) {
int mid = lo + ((hi - lo) / 2);
uint64_t mid_key = key_at(mid);
if (mid_key == key)
return mid;
if (mid_key < key)
lo = mid;
else
hi = mid;
}
return want_hi ? hi : lo;
}
template <typename ValueTraits>
optional<unsigned>
node_ref<ValueTraits>::exact_search(uint64_t key) const
{
int i = bsearch(key, 0);
if (i < 0 || static_cast<unsigned>(i) >= get_nr_entries())
return optional<unsigned>();
if (key != key_at(i))
return optional<unsigned>();
return optional<unsigned>(i);
}
template <typename ValueTraits>
int
node_ref<ValueTraits>::lower_bound(uint64_t key) const
{
return bsearch(key, 0);
}
template <typename ValueTraits>
unsigned
node_ref<ValueTraits>::calc_max_entries(void)
{
uint32_t total;
// key + value
size_t elt_size = sizeof(uint64_t) + sizeof(typename ValueTraits::disk_type);
total = (MD_BLOCK_SIZE - sizeof(struct node_header)) / elt_size;
return (total / 3) * 3; // rounds down
}
template <typename ValueTraits>
void *
node_ref<ValueTraits>::key_ptr(unsigned i) const
{
return raw_->keys + i;
}
template <typename ValueTraits>
void *
node_ref<ValueTraits>::value_ptr(unsigned i) const
{
void *value_base = &raw_->keys[to_cpu<uint32_t>(raw_->header.max_entries)];
return static_cast<unsigned char *>(value_base) +
sizeof(typename ValueTraits::disk_type) * i;
}
template <typename ValueTraits>
template <typename RefCounter>
void
node_ref<ValueTraits>::inc_children(RefCounter &rc)
{
unsigned nr_entries = get_nr_entries();
for (unsigned i = 0; i < nr_entries; i++) {
typename ValueTraits::value_type v;
typename ValueTraits::disk_type d;
::memcpy(&d, value_ptr(i), sizeof(d));
ValueTraits::unpack(d, v);
rc.inc(v);
}
}
//----------------------------------------------------------------
template <unsigned Levels, typename ValueTraits>
btree<Levels, ValueTraits>::
btree(typename transaction_manager::ptr tm,
typename ValueTraits::ref_counter rc)
: tm_(tm),
destroy_(false),
rc_(rc)
{
using namespace btree_detail;
write_ref root = tm_->new_block(btree_validator());
leaf_node n = to_node<ValueTraits>(root);
n.set_type(btree_detail::LEAF);
n.set_nr_entries(0);
n.set_max_entries();
n.set_value_size(sizeof(typename ValueTraits::disk_type));
root_ = root.get_location();
}
template <unsigned Levels, typename ValueTraits>
btree<Levels, ValueTraits>::
btree(typename transaction_manager::ptr tm,
block_address root,
typename ValueTraits::ref_counter rc)
: tm_(tm),
destroy_(false),
root_(root),
rc_(rc)
{
}
template <unsigned Levels, typename ValueTraits>
btree<Levels, ValueTraits>::~btree()
{
}
namespace {
template <typename ValueTraits>
struct lower_bound_search {
static optional<unsigned> search(btree_detail::node_ref<ValueTraits> n, uint64_t key) {
return n.lower_bound(key);
}
};
template <typename ValueTraits>
struct exact_search {
static optional<unsigned> search(btree_detail::node_ref<ValueTraits> n, uint64_t key) {
return n.exact_search(key);
}
};
}
template <unsigned Levels, typename ValueTraits>
typename btree<Levels, ValueTraits>::maybe_value
btree<Levels, ValueTraits>::lookup(key const &key) const
{
using namespace btree_detail;
ro_spine spine(tm_);
block_address root = root_;
for (unsigned level = 0; level < Levels - 1; ++level) {
optional<block_address> mroot =
lookup_raw<uint64_traits, lower_bound_search<uint64_traits> >(spine, root, key[level]);
if (!mroot)
return maybe_value();
root = *mroot;
}
return lookup_raw<ValueTraits, exact_search<ValueTraits> >(spine, root, key[Levels - 1]);
}
template <unsigned Levels, typename ValueTraits>
typename btree<Levels, ValueTraits>::maybe_pair
btree<Levels, ValueTraits>::lookup_le(key const &key) const
{
using namespace btree_detail;
return maybe_pair();
}
template <unsigned Levels, typename ValueTraits>
typename btree<Levels, ValueTraits>::maybe_pair
btree<Levels, ValueTraits>::lookup_ge(key const &key) const
{
using namespace btree_detail;
return maybe_pair();
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::
insert(key const &key,
typename ValueTraits::value_type const &value)
{
using namespace btree_detail;
block_address block = root_;
int index = 0; // FIXME: ???
shadow_spine spine(tm_);
for (unsigned level = 0; level < Levels - 1; ++level) {
bool need_insert = insert_location<uint64_traits>(spine, block, key[level], &index);
internal_node n = spine.template get_node<uint64_traits>();
if (need_insert) {
btree<Levels - 1, ValueTraits> new_tree(tm_, rc_);
n.insert_at(index, key[level], new_tree.get_root());
}
block = n.value_at(index);
}
bool need_insert = insert_location<ValueTraits>(spine, block, key[Levels - 1], &index);
leaf_node n = spine.template get_node<ValueTraits>();
if (need_insert)
n.insert_at(index, key[Levels - 1], value);
else
// FIXME: check if we're overwriting with the same value.
n.set_value(index, value);
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::remove(key const &key)
{
using namespace btree_detail;
}
template <unsigned Levels, typename ValueTraits>
block_address
btree<Levels, ValueTraits>::get_root() const
{
return root_;
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::set_root(block_address root)
{
using namespace btree_detail;
root_ = root;
}
template <unsigned Levels, typename ValueTraits>
typename btree<Levels, ValueTraits>::ptr
btree<Levels, ValueTraits>::clone() const
{
tm_->get_sm()->inc(root_);
return ptr(new btree<Levels, ValueTraits>(tm_, root_, rc_));
}
#if 0
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::destroy()
{
using namespace btree_detail;
}
#endif
template <unsigned Levels, typename _>
template <typename ValueTraits, typename Search>
optional<typename ValueTraits::value_type>
btree<Levels, _>::
lookup_raw(ro_spine &spine, block_address block, uint64_t key) const
{
using namespace boost;
typedef typename ValueTraits::value_type leaf_type;
for (;;) {
spine.step(block);
node_ref<ValueTraits> leaf = spine.template get_node<ValueTraits>();
optional<unsigned> mi;
if (leaf.get_type() == btree_detail::LEAF) {
mi = Search::search(leaf, key);
if (!mi)
return optional<leaf_type>();
return optional<leaf_type>(leaf.value_at(*mi));
}
mi = leaf.lower_bound(key);
if (!mi || *mi < 0)
return optional<leaf_type>();
node_ref<uint64_traits> internal = spine.template get_node<uint64_traits>();
block = internal.value_at(*mi);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
split_node(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key,
bool top)
{
node_ref<ValueTraits> n = spine.template get_node<ValueTraits>();
if (n.get_nr_entries() == n.get_max_entries()) {
if (top)
split_beneath<ValueTraits>(spine, key);
else
split_sibling<ValueTraits>(spine, parent_index, key);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
split_beneath(btree_detail::shadow_spine &spine,
uint64_t key)
{
using namespace btree_detail;
node_type type;
unsigned nr_left, nr_right;
write_ref left = tm_->new_block(btree_validator());
node_ref<ValueTraits> l = to_node<ValueTraits>(left);
l.set_nr_entries(0);
l.set_max_entries();
l.set_value_size(sizeof(typename ValueTraits::disk_type));
write_ref right = tm_->new_block(btree_validator());
node_ref<ValueTraits> r = to_node<ValueTraits>(right);
r.set_nr_entries(0);
r.set_max_entries();
r.set_value_size(sizeof(typename ValueTraits::disk_type));
{
node_ref<ValueTraits> p = spine.template get_node<ValueTraits>();
if (p.get_value_size() != sizeof(typename ValueTraits::disk_type))
throw std::runtime_error("bad value_size");
nr_left = p.get_nr_entries() / 2;
nr_right = p.get_nr_entries() - nr_left;
type = p.get_type();
l.set_type(type);
l.copy_entries(p, 0, nr_left);
r.set_type(type);
r.copy_entries(p, nr_left, nr_left + nr_right);
}
{
// The parent may have changed value type, so we re-get it.
internal_node p = spine.template get_node<uint64_traits>();
p.set_type(btree_detail::INTERNAL);
p.set_max_entries();
p.set_nr_entries(2);
p.set_value_size(sizeof(typename uint64_traits::disk_type));
p.overwrite_at(0, l.key_at(0), left.get_location());
p.overwrite_at(1, r.key_at(0), right.get_location());
}
if (key < r.key_at(0))
spine.step(left);
else
spine.step(right);
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
split_sibling(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key)
{
using namespace btree_detail;
node_ref<ValueTraits> l = spine.template get_node<ValueTraits>();
block_address left = spine.get_block();
write_ref right = tm_->new_block(btree_validator());
node_ref<ValueTraits> r = to_node<ValueTraits>(right);
unsigned nr_left = l.get_nr_entries() / 2;
unsigned nr_right = l.get_nr_entries() - nr_left;
r.set_nr_entries(0);
r.set_max_entries();
r.set_type(l.get_type());
r.set_value_size(sizeof(typename ValueTraits::disk_type));
r.copy_entries(l, nr_left, nr_left + nr_right);
l.set_nr_entries(nr_left);
internal_node p = spine.get_parent();
p.overwrite_at(parent_index, l.key_at(0), left);
p.insert_at(parent_index + 1, r.key_at(0), right.get_location());
spine.pop();
if (key < r.key_at(0))
spine.step(left);
else
spine.step(right);
}
// Returns true if we need a new insertion, rather than overwrite.
template <unsigned Levels, typename _>
template <typename ValueTraits>
bool
btree<Levels, _>::
insert_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
int *index)
{
using namespace btree_detail;
bool top = true; // this isn't the same as spine.has_parent()
int i = *index;
bool inc = false;
for (;;) {
inc = spine.step(block);
#if 0
if (inc)
inc_children<ValueTraits>();
#endif
// patch up the parent to point to the new shadow
if (spine.has_parent()) {
internal_node p = spine.get_parent();
p.set_value(i, spine.get_block());
}
internal_node internal = spine.template get_node<uint64_traits>();
// Split the node if we're full
if (internal.get_type() == INTERNAL)
split_node<uint64_traits>(spine, i, key, top);
else
split_node<ValueTraits>(spine, i, key, top);
internal = spine.template get_node<uint64_traits>();
i = internal.lower_bound(key);
if (internal.get_type() == btree_detail::LEAF)
break;
if (i < 0) {
internal.set_key(0, key);
i = 0;
}
block = internal.value_at(i);
top = false;
}
node_ref<ValueTraits> leaf = spine.template get_node<ValueTraits>();
// FIXME: gross
if (i < 0 || leaf.key_at(i) != key)
i++;
// do decrement the old value if it already exists
// FIXME: I'm not sure about this, I don't understand the |inc| reference
if (static_cast<unsigned>(i) < leaf.get_nr_entries() && leaf.key_at(i) == key && inc) {
// dec old entry
}
*index = i;
return ((static_cast<unsigned>(i) >= leaf.get_nr_entries()) ||
(leaf.key_at(i) != key));
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::visit(typename visitor::ptr visitor) const
{
walk_tree(visitor, 0, true, boost::optional<uint64_t>(), root_);
visitor->visit_complete();
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::
walk_tree(typename visitor::ptr visitor,
unsigned level, bool sub_root,
boost::optional<uint64_t> key,
block_address b) const
{
using namespace btree_detail;
read_ref blk = tm_->read_lock(b);
internal_node o = to_node<uint64_traits>(blk);
if (o.get_type() == INTERNAL) {
if (visitor->visit_internal(level, sub_root, key, o))
for (unsigned i = 0; i < o.get_nr_entries(); i++)
walk_tree(visitor, level, false, o.key_at(i), o.value_at(i));
} else if (level < Levels - 1) {
if (visitor->visit_internal_leaf(level, sub_root, key, o))
for (unsigned i = 0; i < o.get_nr_entries(); i++)
walk_tree(visitor, level + 1, true, boost::optional<uint64_t>(o.key_at(i)), o.value_at(i));
} else {
leaf_node ov = to_node<ValueTraits>(blk);
visitor->visit_leaf(level, sub_root, key, ov);
}
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,304 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BTREE_CHECKER_H
#define BTREE_CHECKER_H
#include "block_counter.h"
#include "btree.h"
#include "checksum.h"
#include "error_set.h"
#include <sstream>
#include <map>
#include <set>
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
namespace persistent_data {
//----------------------------------------------------------------
// This class implements consistency checking for the btrees in
// general. Derive from this if you want some additional checks.
// It's worth summarising what is checked:
//
// Implemented
// -----------
//
// - block_nr
// - nr_entries < max_entries
// - max_entries fits in block
// - max_entries is divisible by 3
// - nr_entries > minimum (except for root nodes)
//
// Not implemented
// ---------------
//
// - leaf | internal flags (this can be inferred from siblings)
//----------------------------------------------------------------
template <uint32_t Levels, typename ValueTraits>
class btree_checker : public btree<Levels, ValueTraits>::visitor {
public:
btree_checker(block_counter &counter, bool avoid_repeated_visits = true)
: counter_(counter),
errs_(new error_set("btree errors")),
avoid_repeated_visits_(avoid_repeated_visits) {
}
bool visit_internal(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<uint64_traits> const &n) {
return check_internal(level, sub_root, key, n);
}
bool visit_internal_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<uint64_traits> const &n) {
return check_leaf(level, sub_root, key, n);
}
bool visit_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<ValueTraits> const &n) {
return check_leaf(level, sub_root, key, n);
}
error_set::ptr get_errors() const {
return errs_;
}
protected:
block_counter &get_counter() {
return counter_;
}
private:
bool check_internal(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<uint64_traits> const &n) {
if (!already_visited(n) &&
check_sum(n) &&
check_block_nr(n) &&
check_max_entries(n) &&
check_nr_entries(n, sub_root) &&
check_ordered_keys(n) &&
check_parent_key(sub_root ? optional<uint64_t>() : key, n)) {
if (sub_root)
new_root(level);
return true;
}
return false;
}
template <typename ValueTraits2>
bool check_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<ValueTraits2> const &n) {
if (!already_visited(n) &&
check_sum(n) &&
check_block_nr(n) &&
check_max_entries(n) &&
check_nr_entries(n, sub_root) &&
check_ordered_keys(n) &&
check_parent_key(sub_root ? optional<uint64_t>() : key, n)) {
if (sub_root)
new_root(level);
return check_leaf_key(level, n);
}
return false;
}
template <typename node>
bool already_visited(node const &n) {
block_address b = n.get_location();
counter_.inc(b);
if (avoid_repeated_visits_) {
if (seen_.count(b) > 0)
return true;
seen_.insert(b);
}
return false;
}
template <typename node>
bool check_sum(node const &n) const {
crc32c sum(BTREE_CSUM_XOR);
disk_node const *data = n.raw();
sum.append(&data->header.flags, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != n.get_checksum()) {
std::ostringstream out;
out << "checksum error for block " << n.get_block_nr()
<< ", sum was " << sum.get_sum()
<< ", on disk " << n.get_checksum();
errs_->add_child(out.str());
return false;
}
return true;
}
template <typename node>
bool check_block_nr(node const &n) const {
if (n.get_location() != n.get_block_nr()) {
std::ostringstream out;
out << "block number mismatch: actually "
<< n.get_location()
<< ", claims " << n.get_block_nr();
errs_->add_child(out.str());
return false;
}
return true;
}
template <typename node>
bool check_max_entries(node const &n) const {
size_t elt_size = sizeof(uint64_t) + n.get_value_size();
if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) {
std::ostringstream out;
out << "max entries too large: " << n.get_max_entries();
errs_->add_child(out.str());
return false;
}
if (n.get_max_entries() % 3) {
std::ostringstream out;
out << "max entries is not divisible by 3: " << n.get_max_entries();
errs_->add_child(out.str());
return false;
}
return true;
}
template <typename node>
bool check_nr_entries(node const &n, bool is_root) const {
if (n.get_nr_entries() > n.get_max_entries()) {
std::ostringstream out;
out << "bad nr_entries: "
<< n.get_nr_entries() << " < "
<< n.get_max_entries();
errs_->add_child(out.str());
return false;
}
block_address min = n.get_max_entries() / 3;
if (!is_root && (n.get_nr_entries() < min)) {
ostringstream out;
out << "too few entries in btree: "
<< n.get_nr_entries()
<< ", expected at least "
<< min
<< "(max_entries = " << n.get_max_entries() << ")";
errs_->add_child(out.str());
return false;
}
return true;
}
template <typename node>
bool check_ordered_keys(node const &n) const {
unsigned nr_entries = n.get_nr_entries();
if (nr_entries == 0)
return true; // can only happen if a root node
uint64_t last_key = n.key_at(0);
for (unsigned i = 1; i < nr_entries; i++) {
uint64_t k = n.key_at(i);
if (k <= last_key) {
ostringstream out;
out << "keys are out of order, " << k << " <= " << last_key;
errs_->add_child(out.str());
return false;
}
last_key = k;
}
return true;
}
template <typename node>
bool check_parent_key(boost::optional<uint64_t> key, node const &n) const {
if (!key)
return true;
if (*key > n.key_at(0)) {
ostringstream out;
out << "parent key mismatch: parent was " << *key
<< ", but lowest in node was " << n.key_at(0);
errs_->add_child(out.str());
return false;
}
return true;
}
template <typename node>
bool check_leaf_key(unsigned level, node const &n) {
if (n.get_nr_entries() == 0)
return true; // can only happen if a root node
if (last_leaf_key_[level] && *last_leaf_key_[level] >= n.key_at(0)) {
ostringstream out;
out << "the last key of the previous leaf was " << *last_leaf_key_[level]
<< " and the first key of this leaf is " << n.key_at(0);
errs_->add_child(out.str());
return false;
}
last_leaf_key_[level] = n.key_at(n.get_nr_entries() - 1);
return true;
}
void new_root(unsigned level) {
// we're starting a new subtree, so should
// reset the last_leaf value.
last_leaf_key_[level] = boost::optional<uint64_t>();
}
block_counter &counter_;
std::set<block_address> seen_;
error_set::ptr errs_;
boost::optional<uint64_t> last_leaf_key_[Levels];
bool avoid_repeated_visits_;
};
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,49 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "checksum.h"
#include <boost/crc.hpp>
using namespace base;
//----------------------------------------------------------------
crc32c::crc32c(uint32_t xor_value)
: xor_value_(xor_value),
sum_(0)
{
}
void
crc32c::append(void const *buffer, unsigned len)
{
uint32_t const powers = 0x1EDC6F41;
boost::crc_basic<32> crc(powers, 0xffffffff, 0, true, true);
crc.process_bytes(buffer, len);
sum_ = crc.checksum();
}
uint32_t
crc32c::get_sum() const
{
return sum_ ^ xor_value_;
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef CHECKSUM_H
#define CHECKSUM_H
#include <stdint.h>
//----------------------------------------------------------------
namespace base {
class crc32c {
public:
crc32c(uint32_t xor_value);
void append(void const *buffer, unsigned len);
uint32_t get_sum() const;
private:
uint32_t xor_value_;
uint32_t sum_;
};
}
//----------------------------------------------------------------
#endif

35
persistent-data/deleter.h Normal file
View File

@@ -0,0 +1,35 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef DELETER_H
#define DELETER_H
//----------------------------------------------------------------
namespace utils {
template <typename T>
struct deleter {
void operator()(T *t) {
delete t;
}
};
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "endian_utils.h"
using namespace base;
//----------------------------------------------------------------
bool
base::test_bit_le(void const *bits, unsigned b)
{
__le64 const *w = reinterpret_cast<__le64 const *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
return (v & mask) ? true : false;
}
void
base::set_bit_le(void *bits, unsigned b)
{
__le64 *w = reinterpret_cast<__le64 *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
v |= mask;
*w = to_disk<__le64>(v);
}
void
base::clear_bit_le(void *bits, unsigned b)
{
__le64 *w = reinterpret_cast<__le64 *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
mask = ~mask;
v &= mask;
*w = to_disk<__le64>(v);
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,110 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef ENDIAN_H
#define ENDIAN_H
#include <endian.h>
#include <stdint.h>
#include <boost/static_assert.hpp>
//----------------------------------------------------------------
// FIXME: rename to endian
namespace base {
// These are just little wrapper types to make the compiler
// understand that the le types are not assignable to the
// corresponding cpu type.
struct __le16 {
explicit __le16(uint16_t v = 0)
: v_(v) {
}
uint16_t v_;
} __attribute__((packed));
struct __le32 {
explicit __le32(uint32_t v = 0)
: v_(v) {
}
uint32_t v_;
} __attribute__((packed));
struct __le64 {
explicit __le64(uint64_t v = 0)
: v_(v) {
}
uint64_t v_;
} __attribute__((packed));
//--------------------------------
// FIXME: actually do the conversions !
template <typename CPUType, typename DiskType>
CPUType to_cpu(DiskType const &d) {
BOOST_STATIC_ASSERT(sizeof(d) == 0);
}
template <typename DiskType, typename CPUType>
DiskType to_disk(CPUType const &v) {
BOOST_STATIC_ASSERT(sizeof(v) == 0);
}
template <>
inline uint16_t to_cpu<uint16_t, __le16>(__le16 const &d) {
return le16toh(d.v_);
}
template <>
inline __le16 to_disk<__le16, uint16_t>(uint16_t const &v) {
return __le16(htole16(v));
}
template <>
inline uint32_t to_cpu<uint32_t, __le32>(__le32 const &d) {
return le32toh(d.v_);
}
template <>
inline __le32 to_disk<__le32, uint32_t>(uint32_t const &v) {
return __le32(htole32(v));
}
template <>
inline uint64_t to_cpu<uint64_t, __le64>(__le64 const &d) {
return le64toh(d.v_);
}
template <>
inline __le64 to_disk<__le64, uint64_t>(uint64_t const &v) {
return __le64(htole64(v));
}
//--------------------------------
bool test_bit_le(void const *bits, unsigned b);
void set_bit_le(void *bits, unsigned b);
void clear_bit_le(void *bits, unsigned b);
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,112 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "error_set.h"
#include <iostream>
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
error_set::error_set(string const &err)
: err_(err) {
}
string const &
error_set::get_description() const
{
return err_;
}
list<error_set::ptr> const &
error_set::get_children() const
{
return children_;
}
void
error_set::add_child(error_set::ptr err)
{
children_.push_back(err);
}
void
error_set::add_child(boost::optional<error_set::ptr> maybe_errs)
{
if (maybe_errs)
children_.push_back(*maybe_errs);
}
void
error_set::add_child(string const &err)
{
error_set::ptr e(new error_set(err));
add_child(e);
}
bool
error_set::empty() const
{
return !children_.size();
}
//--------------------------------
namespace {
void indent_by(ostream &out, unsigned indent) {
for (unsigned i = 0; i < indent; i++)
out << ' ';
}
void print_errs(ostream &out, error_set::ptr e, unsigned depth, unsigned indent) {
if (depth == 0)
return;
indent_by(out, indent);
out << e->get_description() << endl;
if (depth > 1) {
list<error_set::ptr> const &children = e->get_children();
for (list<error_set::ptr>::const_iterator it = children.begin(); it != children.end(); ++it)
print_errs(out, *it, depth - 1, indent + 2);
}
}
}
error_selector::error_selector(error_set::ptr errs, unsigned depth)
: errs_(errs),
depth_(depth)
{
}
void
error_selector::print(ostream &out) const
{
print_errs(out, errs_, depth_, 0);
}
ostream &
persistent_data::operator << (ostream &out, error_selector const &errs)
{
errs.print(out);
return out;
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,71 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef ERROR_SET_H
#define ERROR_SET_H
#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
#include <list>
#include <iosfwd>
#include <string>
//----------------------------------------------------------------
namespace persistent_data {
// When checking the metadata for a thin device we don't want to
// stop at the first error. Instead should collect as much
// information as possible. The errors are hierarchical, so the
// user can control how much detail is displayed.
class error_set {
public:
typedef boost::shared_ptr<error_set> ptr;
error_set(std::string const &err);
std::string const &get_description() const;
std::list<error_set::ptr> const &get_children() const;
void add_child(error_set::ptr err);
void add_child(boost::optional<error_set::ptr> maybe_errs);
void add_child(std::string const &err);
bool empty() const;
private:
std::string err_;
std::list<error_set::ptr> children_;
};
// The error_selector is a little proxy class used when printing
// errors to a stream.
class error_selector {
public:
error_selector(error_set::ptr errs, unsigned depth);
void print(std::ostream &out) const;
private:
error_set::ptr errs_;
unsigned depth_;
};
std::ostream &operator << (std::ostream &out, error_selector const &errs);
}
//----------------------------------------------------------------
#endif

37
persistent-data/errors.h Normal file
View File

@@ -0,0 +1,37 @@
// Copyright (C) 2012 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef THINP_EXCEPTION_H
#define THINP_EXCEPTION_H
#include <stdexcept>
//----------------------------------------------------------------
namespace base {
class checksum_error : public std::runtime_error {
public:
explicit checksum_error(std::string const &what)
: std::runtime_error(what) {
}
};
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "hex_dump.h"
#include <iostream>
#include <iomanip>
using namespace std;
//----------------------------------------------------------------
void base::hex_dump(ostream &out, void const *data_, size_t len)
{
unsigned char const *data = reinterpret_cast<unsigned char const *>(data_),
*end = data + len;
out << hex;
while (data < end) {
for (unsigned i = 0; i < 16 && data < end; i++, data++)
out << setw(2) << setfill('0') << (unsigned) *data << " ";
out << endl;
}
out << dec;
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,32 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef HEX_DUMP_H
#define HEX_DUMP_H
#include <iosfwd>
//----------------------------------------------------------------
namespace base {
void hex_dump(std::ostream &out, void const *data, size_t len);
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef THINP_MATH_H
#define THINP_MATH_H
//----------------------------------------------------------------
namespace base {
// Only works for integral types
template <typename T>
T div_up(T const &v, T const &divisor) {
return (v + (divisor - 1)) / divisor;
}
// Seemingly pointless function, but it coerces the arguments
// nicely.
template <typename T>
T div_down(T const &v, T const &divisor) {
return v / divisor;
}
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,72 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef RUN_LIST_H
#define RUN_LIST_H
#include <set>
//----------------------------------------------------------------
namespace base {
template <typename T>
struct run {
run(T const &b, T const &e)
: b_(b),
e_(e) {
}
bool operator< (run const &rhs) const {
return b_ < rhs.b_;
}
T b_, e_;
};
template <typename T>
class run_list {
public:
run_list()
: invert_(false) {
}
void add_run(T const &b, T const &e);
void sub_run(T const &b, T const &e);
bool in_run(T const &key) const;
void invert();
void add(run_list<T> const &rl);
void sub(run_list<T> const &rl);
typedef std::set<run<T> >::const_iterator const_iterator;
const_iterator begin() const;
const_iterator end() const;
private:
bool in_run_(T const &key) const;
bool invert_;
std::set<run<T> > runs_;
};
}
//----------------------------------------------------------------
#include "run_list.tcc"
#endif

View File

@@ -0,0 +1,179 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include <iostream> // FIXME: remove
#include <boost/optional.hpp>
//----------------------------------------------------------------
namespace {
using namespace base;
using namespace boost;
using namespace std;
template <typename T>
bool overlaps_ordered(run<T> const &lhs, run<T> const &rhs) {
return rhs.b_ < lhs.e_;
}
template <typename T>
bool overlaps(run<T> const &lhs, run<T> const &rhs) {
if (lhs.b_ <= rhs.b_)
return overlaps_ordered(lhs, rhs);
else
return overlaps_ordered(rhs, lhs);
}
template <typename T>
boost::optional<run<T> >
merge_ordered_runs_if_overlapping(run<T> const &lhs, run<T> const &rhs) {
typedef optional<run<T> > result;
if (lhs.e_ < rhs.e_)
return result(run<T>(lhs.b_, rhs.e_));
if (lhs.e_ <= rhs.e_)
return result(lhs);
return result();
}
template <typename T>
boost::optional<run<T> >
merge_if_overlapping(run<T> const &lhs, run<T> const &rhs) {
if (lhs.b_ <= rhs.b_)
return merge_ordered_runs_if_overlapping(lhs, rhs);
else
return merge_ordered_runs_if_overlapping(rhs, lhs);
}
template <typename T>
pair<typename set<run<T> >::const_iterator,
typename set<run<T> >::const_iterator>
overlapping_range(set<run<T> > const &runs, run<T> const &r) {
// FIXME: slow, but correct implementation first
typedef typename set<run<T> >::const_iterator cit;
for (cit b = runs.begin(); b != runs.end(); ++b) {
if (overlaps(*b, r)) {
cit e = b;
++e;
while (overlaps(*e, r))
++e;
return make_pair(b, e);
}
}
return make_pair(runs.end(), runs.end());
}
}
//----------------------------------------------------------------
template <typename T>
void
run_list<T>::add_run(T const &b, T const &e)
{
using namespace std;
typedef typename set<run<T> >::const_iterator cit;
run<T> r(b, e);
pair<cit, cit> range = overlapping_range(runs_, r);
for (cit it = range.first; it != range.second; ++it) {
optional<run<T> > mr = merge_if_overlapping(r, *it);
if (mr)
r = *mr;
}
runs_.erase(range.first, range.second);
runs_.insert(r);
}
template <typename T>
void
run_list<T>::sub_run(T const &b, T const &e)
{
// FIXME: finish
}
template <typename T>
bool
run_list<T>::in_run_(T const &key) const
{
using namespace std;
run<T> r(key, key + 1);
typename set<run<T> >::const_iterator it = runs_.lower_bound(r);
if (it != runs_.end() && it->b_ == key)
return true;
--it;
if (it == runs_.end())
return false;
return it->b_ <= key && it->e_ > key;
}
template <typename T>
bool
run_list<T>::in_run(T const &key) const
{
if (invert_)
return !in_run_(key);
else
return in_run_(key);
}
template <typename T>
void
run_list<T>::invert()
{
invert_ = !invert_;
}
template <typename T>
void
run_list<T>::add(run_list<T> const &rl)
{
// FIXME: finish
}
template <typename T>
void
run_list<T>::sub(run_list<T> const &rl)
{
// FIXME: finish
}
template <typename T>
const_iterator
run_list<T>::begin() const
{
return runs_.begin();
}
const_iterator
run_list<T>::end() const
{
return runs_.end();
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "space_map.h"
using namespace persistent_data;
//----------------------------------------------------------------
sm_decrementer::sm_decrementer(space_map::ptr sm, block_address b)
: sm_(sm),
b_(b),
released_(false) {
}
sm_decrementer::~sm_decrementer() {
if (!released_)
sm_->dec(b_);
}
void
sm_decrementer::dont_bother() {
released_ = true;
}
//----------------------------------------------------------------

140
persistent-data/space_map.h Normal file
View File

@@ -0,0 +1,140 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef SPACE_MAP_H
#define SPACE_MAP_H
#include "block.h"
#include "block_counter.h"
#include <boost/shared_ptr.hpp>
#include <boost/optional.hpp>
//----------------------------------------------------------------
namespace persistent_data {
typedef uint32_t ref_t;
class space_map {
public:
typedef boost::shared_ptr<space_map> ptr;
virtual ~space_map() {};
virtual block_address get_nr_blocks() const = 0;
virtual block_address get_nr_free() const = 0;
virtual ref_t get_count(block_address b) const = 0;
virtual void set_count(block_address b, ref_t c) = 0;
virtual void commit() = 0;
virtual void inc(block_address b) = 0;
virtual void dec(block_address b) = 0;
// FIXME: change these to return an optional, failure is
// not that rare if we're restricting the area that's
// searched.
typedef boost::optional<block_address> maybe_block;
virtual maybe_block new_block() = 0;
virtual maybe_block new_block(block_address begin, block_address end) = 0;
virtual bool count_possibly_greater_than_one(block_address b) const = 0;
virtual void extend(block_address extra_blocks) = 0;
struct iterator {
virtual ~iterator() {}
virtual void operator() (block_address b, ref_t c) = 0;
};
virtual void iterate(iterator &it) const {
throw std::runtime_error("not implemented");
}
};
class persistent_space_map : public space_map {
public:
typedef boost::shared_ptr<persistent_space_map> ptr;
virtual size_t root_size() const = 0;
virtual void copy_root(void *dest, size_t len) const = 0;
};
class checked_space_map : public persistent_space_map {
public:
typedef boost::shared_ptr<checked_space_map> ptr;
virtual void check(block_counter &counter) const {
throw std::runtime_error("not implemented");
}
virtual ptr clone() const = 0;
};
class sm_adjust {
public:
sm_adjust(space_map::ptr sm, block_address b, int delta)
: sm_(sm),
b_(b),
delta_(delta) {
adjust_count(delta_);
}
~sm_adjust() {
adjust_count(-delta_);
}
void release() {
delta_ = 0;
}
private:
void adjust_count(int delta) {
if (delta == 1)
sm_->inc(b_);
else if (delta == -1)
sm_->dec(b_);
else
sm_->set_count(b_, sm_->get_count(b_) + delta);
}
space_map::ptr sm_;
block_address b_;
int delta_;
};
class sm_decrementer {
public:
sm_decrementer(space_map::ptr sm, block_address b);
~sm_decrementer();
void dont_bother();
private:
space_map::ptr sm_;
block_address b_;
bool released_;
};
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,104 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef CORE_MAP_H
#define CORE_MAP_H
#include "space_map.h"
//----------------------------------------------------------------
namespace persistent_data {
class core_map : public space_map {
public:
core_map(block_address nr_blocks)
: counts_(nr_blocks, 0),
nr_free_(nr_blocks) {
}
block_address get_nr_blocks() const {
return counts_.size();
}
block_address get_nr_free() const {
return nr_free_;
}
ref_t get_count(block_address b) const {
return counts_[b];
}
void set_count(block_address b, ref_t c) {
if (counts_[b] == 0 && c > 0)
nr_free_--;
else if (counts_[b] > 0 && c == 0)
nr_free_++;
counts_[b] = c;
}
void commit() {
}
void inc(block_address b) {
if (counts_[b] == 0)
nr_free_--;
counts_[b]++;
}
void dec(block_address b) {
counts_[b]--;
if (counts_[b] == 0)
nr_free_++;
}
maybe_block new_block() {
return new_block(0, counts_.size());
}
maybe_block new_block(block_address begin, block_address end) {
for (block_address i = begin; i < std::min<block_address>(end, counts_.size()); i++)
if (counts_[i] == 0) {
counts_[i] = 1;
nr_free_--;
return i;
}
return maybe_block();
}
bool count_possibly_greater_than_one(block_address b) const {
return counts_[b] > 1;
}
void extend(block_address extra_blocks) {
throw std::runtime_error("not implemented");
}
private:
std::vector<ref_t> counts_;
unsigned nr_free_;
};
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,725 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "space_map_disk.h"
#include "checksum.h"
#include "endian_utils.h"
#include "math_utils.h"
#include "space_map_disk_structures.h"
#include "space_map_recursive.h"
#include "space_map_transactional.h"
#include "transaction_manager.h"
using namespace boost;
using namespace persistent_data;
using namespace std;
using namespace sm_disk_detail;
//----------------------------------------------------------------
namespace {
uint64_t const BITMAP_CSUM_XOR = 240779;
struct bitmap_block_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const {
bitmap_header const *data = reinterpret_cast<bitmap_header const *>(&b);
crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(data->csum))
throw checksum_error("bad checksum in space map bitmap");
if (to_cpu<uint64_t>(data->blocknr) != location)
throw checksum_error("bad block nr in space map bitmap");
}
virtual void prepare(buffer<> &b, block_address location) const {
bitmap_header *data = reinterpret_cast<bitmap_header *>(&b);
data->blocknr = to_disk<base::__le64, uint64_t>(location);
crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
data->csum = to_disk<base::__le32>(sum.get_sum());
}
};
block_manager<>::validator::ptr
bitmap_validator() {
return block_manager<>::validator::ptr(new bitmap_block_validator());
}
//--------------------------------
uint64_t const INDEX_CSUM_XOR = 160478;
// FIXME: factor out the common code in these validators
struct index_block_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const {
metadata_index const *mi = reinterpret_cast<metadata_index const *>(&b);
crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(mi->csum_))
throw checksum_error("bad checksum in metadata index block");
if (to_cpu<uint64_t>(mi->blocknr_) != location)
throw checksum_error("bad block nr in metadata index block");
}
virtual void prepare(buffer<> &b, block_address location) const {
metadata_index *mi = reinterpret_cast<metadata_index *>(&b);
mi->blocknr_ = to_disk<base::__le64, uint64_t>(location);
crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
mi->csum_ = to_disk<base::__le32>(sum.get_sum());
}
};
block_manager<>::validator::ptr
index_validator() {
return block_manager<>::validator::ptr(new index_block_validator());
}
//--------------------------------
class bitmap {
public:
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
bitmap(transaction_manager::ptr tm,
index_entry const &ie)
: tm_(tm),
ie_(ie) {
}
ref_t lookup(unsigned b) const {
read_ref rr = tm_->read_lock(ie_.blocknr_, bitmap_validator());
void const *bits = bitmap_data(rr);
ref_t b1 = test_bit_le(bits, b * 2);
ref_t b2 = test_bit_le(bits, b * 2 + 1);
ref_t result = b2 ? 1 : 0;
result |= b1 ? 0b10 : 0;
return result;
}
void insert(unsigned b, ref_t n) {
write_ref wr = tm_->shadow(ie_.blocknr_, bitmap_validator()).first;
void *bits = bitmap_data(wr);
bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1);
if (n == 1 || n == 3)
set_bit_le(bits, b * 2 + 1);
else
clear_bit_le(bits, b * 2 + 1);
if (n == 2 || n == 3)
set_bit_le(bits, b * 2);
else
clear_bit_le(bits, b * 2);
ie_.blocknr_ = wr.get_location();
if (was_free && n > 0) {
ie_.nr_free_--;
if (b == ie_.none_free_before_)
ie_.none_free_before_++;
}
if (!was_free && n == 0) {
ie_.nr_free_++;
if (b < ie_.none_free_before_)
ie_.none_free_before_ = b;
}
}
boost::optional<unsigned> find_free(unsigned begin, unsigned end) {
for (unsigned i = max(begin, ie_.none_free_before_); i < end; i++) {
if (lookup(i) == 0) {
insert(i, 1);
ie_.none_free_before_ = i + 1;
return boost::optional<unsigned>(i);
}
}
return boost::optional<unsigned>();
}
index_entry const &get_ie() const {
return ie_;
}
void iterate(block_address offset, block_address hi, space_map::iterator &it) const {
read_ref rr = tm_->read_lock(ie_.blocknr_, bitmap_validator());
void const *bits = bitmap_data(rr);
for (unsigned b = 0; b < hi; b++) {
ref_t b1 = test_bit_le(bits, b * 2);
ref_t b2 = test_bit_le(bits, b * 2 + 1);
ref_t result = b2 ? 1 : 0;
result |= b1 ? 0b10 : 0;
it(offset + b, result);
}
}
private:
void *bitmap_data(transaction_manager::write_ref &wr) {
bitmap_header *h = reinterpret_cast<bitmap_header *>(&wr.data()[0]);
return h + 1;
}
void const *bitmap_data(transaction_manager::read_ref &rr) const {
bitmap_header const *h = reinterpret_cast<bitmap_header const *>(&rr.data()[0]);
return h + 1;
}
transaction_manager::ptr tm_;
index_entry ie_;
};
struct ref_count_traits {
typedef __le32 disk_type;
typedef uint32_t value_type;
typedef NoOpRefCounter<uint32_t> ref_counter;
static void unpack(disk_type const &d, value_type &v) {
v = to_cpu<value_type>(d);
}
static void pack(value_type const &v, disk_type &d) {
d = to_disk<disk_type>(v);
}
};
class ref_count_checker : public btree_checker<1, ref_count_traits> {
public:
typedef boost::shared_ptr<ref_count_checker> ptr;
ref_count_checker(block_counter &counter)
: btree_checker<1, ref_count_traits>(counter) {
}
};
class index_store {
public:
typedef boost::shared_ptr<index_store> ptr;
virtual void resize(block_address nr_indexes) = 0;
virtual index_entry find_ie(block_address b) const = 0;
virtual void save_ie(block_address b, struct index_entry ie) = 0;
virtual void commit_ies() = 0;
virtual ptr clone() const = 0;
virtual block_address get_root() const = 0;
virtual void check(block_counter &counter, block_address nr_index_entries) const = 0;
};
unsigned const ENTRIES_PER_BLOCK = (MD_BLOCK_SIZE - sizeof(bitmap_header)) * 4;
class sm_disk : public checked_space_map {
public:
typedef boost::shared_ptr<sm_disk> ptr;
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
sm_disk(index_store::ptr indexes,
transaction_manager::ptr tm)
: tm_(tm),
indexes_(indexes),
nr_blocks_(0),
nr_allocated_(0),
ref_counts_(tm_, ref_count_traits::ref_counter()) {
}
sm_disk(index_store::ptr indexes,
transaction_manager::ptr tm,
sm_root const &root)
: tm_(tm),
indexes_(indexes),
nr_blocks_(root.nr_blocks_),
nr_allocated_(root.nr_allocated_),
ref_counts_(tm_, root.ref_count_root_, ref_count_traits::ref_counter()) {
}
block_address get_nr_blocks() const {
return nr_blocks_;
}
block_address get_nr_free() const {
return nr_blocks_ - nr_allocated_;
}
ref_t get_count(block_address b) const {
ref_t count = lookup_bitmap(b);
if (count == 3)
return lookup_ref_count(b);
return count;
}
void set_count(block_address b, ref_t c) {
ref_t old = get_count(b);
if (c == old)
return;
if (c > 2) {
if (old < 3)
insert_bitmap(b, 3);
insert_ref_count(b, c);
} else {
if (old > 2)
remove_ref_count(b);
insert_bitmap(b, c);
}
if (old == 0)
nr_allocated_++;
else if (c == 0)
nr_allocated_--;
}
void commit() {
indexes_->commit_ies();
}
void inc(block_address b) {
// FIXME: 2 get_counts
ref_t old = get_count(b);
set_count(b, old + 1);
}
void dec(block_address b) {
ref_t old = get_count(b);
set_count(b, old - 1);
}
maybe_block new_block() {
// FIXME: keep track of the lowest free block so we
// can start searching from a suitable place.
return new_block(0, nr_blocks_);
}
maybe_block new_block(block_address begin, block_address end) {
block_address begin_index = begin / ENTRIES_PER_BLOCK;
block_address end_index = div_up<block_address>(end, ENTRIES_PER_BLOCK);
for (block_address index = begin_index; index < end_index; index++) {
index_entry ie = indexes_->find_ie(index);
bitmap bm(tm_, ie);
optional<unsigned> maybe_b = bm.find_free((index == begin_index) ? (begin % ENTRIES_PER_BLOCK) : 0,
(index == end_index - 1) ? (end % ENTRIES_PER_BLOCK) : ENTRIES_PER_BLOCK);
if (maybe_b) {
block_address b = *maybe_b;
indexes_->save_ie(index, bm.get_ie());
nr_allocated_++;
b = (index * ENTRIES_PER_BLOCK) + b;
assert(get_count(b) == 1);
return b;
}
}
return maybe_block();
}
bool count_possibly_greater_than_one(block_address b) const {
return get_count(b) > 1;
}
virtual void extend(block_address extra_blocks) {
block_address nr_blocks = nr_blocks_ + extra_blocks;
block_address bitmap_count = div_up<block_address>(nr_blocks, ENTRIES_PER_BLOCK);
block_address old_bitmap_count = div_up<block_address>(nr_blocks_, ENTRIES_PER_BLOCK);
indexes_->resize(bitmap_count);
for (block_address i = old_bitmap_count; i < bitmap_count; i++) {
write_ref wr = tm_->new_block(bitmap_validator());
index_entry ie;
ie.blocknr_ = wr.get_location();
ie.nr_free_ = i == (bitmap_count - 1) ?
(nr_blocks % ENTRIES_PER_BLOCK) : ENTRIES_PER_BLOCK;
ie.none_free_before_ = 0;
indexes_->save_ie(i, ie);
}
nr_blocks_ = nr_blocks;
}
virtual void check(block_counter &counter) const {
ref_count_checker::ptr v(new ref_count_checker(counter));
ref_counts_.visit(v);
block_address nr_entries = div_up<block_address>(get_nr_blocks(), ENTRIES_PER_BLOCK);
indexes_->check(counter, nr_entries);
}
struct look_aside_iterator : public iterator {
look_aside_iterator(sm_disk const &smd, iterator &it)
: smd_(smd),
it_(it) {
}
virtual void operator () (block_address b, ref_t c) {
it_(b, c == 3 ? smd_.lookup_ref_count(b) : c);
}
sm_disk const &smd_;
iterator &it_;
};
friend struct look_aside_iterator;
virtual void iterate(iterator &it) const {
look_aside_iterator wrapper(*this, it);
unsigned nr_indexes = div_up<block_address>(nr_blocks_, ENTRIES_PER_BLOCK);
for (unsigned i = 0; i < nr_indexes; i++) {
unsigned hi = (i == nr_indexes - 1) ? (nr_blocks_ % ENTRIES_PER_BLOCK) : ENTRIES_PER_BLOCK;
index_entry ie = indexes_->find_ie(i);
bitmap bm(tm_, ie);
bm.iterate(i * ENTRIES_PER_BLOCK, hi, wrapper);
}
}
virtual size_t root_size() const {
return sizeof(sm_root_disk);
}
virtual void copy_root(void *dest, size_t len) const {
sm_root_disk d;
sm_root v;
if (len < sizeof(d))
throw runtime_error("root too small");
v.nr_blocks_ = sm_disk::get_nr_blocks();
v.nr_allocated_ = sm_disk::get_nr_allocated();
v.bitmap_root_ = get_index_store()->get_root();
v.ref_count_root_ = sm_disk::get_ref_count_root();
sm_root_traits::pack(v, d);
::memcpy(dest, &d, sizeof(d));
}
virtual checked_space_map::ptr clone() const {
sm_root root;
root.nr_blocks_ = nr_blocks_;
root.nr_allocated_ = nr_allocated_;
root.bitmap_root_ = indexes_->get_root();
root.ref_count_root_ = ref_counts_.get_root();
return checked_space_map::ptr(
new sm_disk(indexes_->clone(), tm_, root));
}
protected:
transaction_manager::ptr get_tm() const {
return tm_;
}
block_address get_nr_allocated() const {
return nr_allocated_;
}
block_address get_ref_count_root() const {
return ref_counts_.get_root();
}
index_store::ptr get_index_store() const {
return indexes_;
}
private:
ref_t lookup_bitmap(block_address b) const {
index_entry ie = indexes_->find_ie(b / ENTRIES_PER_BLOCK);
bitmap bm(tm_, ie);
return bm.lookup(b % ENTRIES_PER_BLOCK);
}
void insert_bitmap(block_address b, unsigned n) {
if (n > 3)
throw runtime_error("bitmap can only hold 2 bit values");
index_entry ie = indexes_->find_ie(b / ENTRIES_PER_BLOCK);
bitmap bm(tm_, ie);
bm.insert(b % ENTRIES_PER_BLOCK, n);
indexes_->save_ie(b / ENTRIES_PER_BLOCK, bm.get_ie());
}
ref_t lookup_ref_count(block_address b) const {
uint64_t key[1] = {b};
optional<ref_t> mvalue = ref_counts_.lookup(key);
if (!mvalue)
throw runtime_error("ref count not in tree");
return *mvalue;
}
void insert_ref_count(block_address b, ref_t count) {
uint64_t key[1] = {b};
ref_counts_.insert(key, count);
}
void remove_ref_count(block_address b) {
uint64_t key[1] = {b};
ref_counts_.remove(key);
}
transaction_manager::ptr tm_;
index_store::ptr indexes_;
block_address nr_blocks_;
block_address nr_allocated_;
btree<1, ref_count_traits> ref_counts_;
};
class bitmap_tree_validator : public btree_checker<1, index_entry_traits> {
public:
typedef boost::shared_ptr<bitmap_tree_validator> ptr;
bitmap_tree_validator(block_counter &counter)
: btree_checker<1, index_entry_traits>(counter) {
}
bool visit_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<index_entry_traits> const &n) {
bool r = btree_checker<1, index_entry_traits>::visit_leaf(level, sub_root, key, n);
if (!r)
return r;
for (unsigned i = 0; i < n.get_nr_entries(); i++) {
if (seen_indexes_.count(n.key_at(i)) > 0) {
ostringstream out;
out << "index entry " << i << " is present twice";
throw runtime_error(out.str());
}
seen_indexes_.insert(n.key_at(i));
btree_checker<1, index_entry_traits>::get_counter().inc(n.value_at(i).blocknr_);
}
return true;
}
void check_all_index_entries_present(block_address nr_entries) {
for (block_address i = 0; i < nr_entries; i++) {
if (seen_indexes_.count(i) == 0) {
ostringstream out;
out << "missing index entry " << i;
throw runtime_error(out.str());
}
}
set<block_address>::const_iterator it;
for (it = seen_indexes_.begin(); it != seen_indexes_.end(); ++it) {
if (*it >= nr_entries) {
ostringstream out;
out << "unexpected index entry " << *it;
throw runtime_error(out.str());
}
}
}
private:
set<block_address> seen_indexes_;
};
class btree_index_store : public index_store {
public:
typedef boost::shared_ptr<btree_index_store> ptr;
btree_index_store(transaction_manager::ptr tm)
: tm_(tm),
bitmaps_(tm, index_entry_traits::ref_counter()) {
}
btree_index_store(transaction_manager::ptr tm,
block_address root)
: tm_(tm),
bitmaps_(tm, root, index_entry_traits::ref_counter()) {
}
virtual void resize(block_address nr_entries) {
// No op
}
virtual index_entry find_ie(block_address ie_index) const {
uint64_t key[1] = {ie_index};
optional<index_entry> mindex = bitmaps_.lookup(key);
if (!mindex)
throw runtime_error("Couldn't lookup bitmap");
return *mindex;
}
virtual void save_ie(block_address ie_index, struct index_entry ie) {
uint64_t key[1] = {ie_index};
bitmaps_.insert(key, ie);
}
virtual void commit_ies() {
// No op
}
virtual index_store::ptr clone() const {
return index_store::ptr(new btree_index_store(tm_, bitmaps_.get_root()));
}
virtual block_address get_root() const {
return bitmaps_.get_root();
}
virtual void check(block_counter &counter, block_address nr_index_entries) const {
bitmap_tree_validator::ptr v(new bitmap_tree_validator(counter));
bitmaps_.visit(v);
v->check_all_index_entries_present(nr_index_entries);
}
private:
transaction_manager::ptr tm_;
btree<1, index_entry_traits> bitmaps_;
};
class metadata_index_store : public index_store {
public:
typedef boost::shared_ptr<metadata_index_store> ptr;
metadata_index_store(transaction_manager::ptr tm)
: tm_(tm) {
block_manager<>::write_ref wr = tm_->new_block(index_validator());
bitmap_root_ = wr.get_location();
}
metadata_index_store(transaction_manager::ptr tm, block_address root, block_address nr_indexes)
: tm_(tm),
bitmap_root_(root) {
resize(nr_indexes);
load_ies();
}
virtual void resize(block_address nr_indexes) {
entries_.resize(nr_indexes);
}
virtual index_entry find_ie(block_address ie_index) const {
return entries_[ie_index];
}
virtual void save_ie(block_address ie_index, struct index_entry ie) {
entries_[ie_index] = ie;
}
virtual void commit_ies() {
std::pair<block_manager<>::write_ref, bool> p =
tm_->shadow(bitmap_root_, index_validator());
bitmap_root_ = p.first.get_location();
metadata_index *mdi = reinterpret_cast<metadata_index *>(&p.first.data());
for (unsigned i = 0; i < entries_.size(); i++)
index_entry_traits::pack(entries_[i], mdi->index[i]);
}
virtual index_store::ptr clone() const {
return index_store::ptr(new metadata_index_store(tm_, bitmap_root_, entries_.size()));
}
virtual block_address get_root() const {
return bitmap_root_;
}
virtual void check(block_counter &counter, block_address nr_index_entries) const {
counter.inc(bitmap_root_);
for (unsigned i = 0; i < entries_.size(); i++)
// FIXME: this looks like a hack
if (entries_[i].blocknr_ != 0) // superblock
counter.inc(entries_[i].blocknr_);
}
private:
void load_ies() {
block_manager<>::read_ref rr =
tm_->read_lock(bitmap_root_, index_validator());
metadata_index const *mdi = reinterpret_cast<metadata_index const *>(&rr.data());
for (unsigned i = 0; i < entries_.size(); i++)
index_entry_traits::unpack(*(mdi->index + i), entries_[i]);
}
transaction_manager::ptr tm_;
block_address bitmap_root_;
std::vector<index_entry> entries_;
};
}
//----------------------------------------------------------------
checked_space_map::ptr
persistent_data::create_disk_sm(transaction_manager::ptr tm,
block_address nr_blocks)
{
index_store::ptr store(new btree_index_store(tm));
checked_space_map::ptr sm(new sm_disk(store, tm));
sm->extend(nr_blocks);
sm->commit();
return sm;
}
checked_space_map::ptr
persistent_data::open_disk_sm(transaction_manager::ptr tm, void *root)
{
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
index_store::ptr store(new btree_index_store(tm, v.bitmap_root_));
return checked_space_map::ptr(new sm_disk(store, tm, v));
}
checked_space_map::ptr
persistent_data::create_metadata_sm(transaction_manager::ptr tm, block_address nr_blocks)
{
index_store::ptr store(new metadata_index_store(tm));
checked_space_map::ptr sm(new sm_disk(store, tm));
sm->extend(nr_blocks);
sm->commit();
return create_transactional_sm(
create_recursive_sm(sm));
}
checked_space_map::ptr
persistent_data::open_metadata_sm(transaction_manager::ptr tm, void *root)
{
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
block_address nr_indexes = div_up<block_address>(v.nr_blocks_, ENTRIES_PER_BLOCK);
index_store::ptr store(new metadata_index_store(tm, v.bitmap_root_, nr_indexes));
return create_transactional_sm(
create_recursive_sm(
checked_space_map::ptr(new sm_disk(store, tm, v))));
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,43 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef SPACE_MAP_DISK_H
#define SPACE_MAP_DISK_H
#include "btree_checker.h"
#include "space_map.h"
//----------------------------------------------------------------
namespace persistent_data {
checked_space_map::ptr
create_disk_sm(transaction_manager::ptr tm, block_address nr_blocks);
checked_space_map::ptr
open_disk_sm(transaction_manager::ptr tm, void *root);
checked_space_map::ptr
create_metadata_sm(transaction_manager::ptr tm, block_address nr_blocks);
checked_space_map::ptr
open_metadata_sm(transaction_manager::ptr tm, void *root);
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,250 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "persistent-data/space_map_recursive.h"
using namespace persistent_data;
//----------------------------------------------------------------
namespace {
struct block_op {
enum op {
INC,
DEC,
SET
};
block_op(op o, block_address b)
: op_(o),
b_(b) {
if (o == SET)
throw runtime_error("SET must take an operand");
}
block_op(op o, block_address b, uint32_t rc)
: op_(o),
b_(b),
rc_(rc) {
if (o != SET)
throw runtime_error("only SET takes an operand");
}
op op_;
block_address b_;
uint32_t rc_;
};
class sm_recursive : public checked_space_map {
public:
sm_recursive(checked_space_map::ptr sm)
: sm_(sm),
depth_(0) {
}
virtual block_address get_nr_blocks() const {
return sm_->get_nr_blocks();
}
virtual block_address get_nr_free() const {
return sm_->get_nr_free();
}
virtual ref_t get_count(block_address b) const {
cant_recurse("get_count");
recursing_const_lock lock(*this);
return sm_->get_count(b);
}
virtual void set_count(block_address b, ref_t c) {
if (depth_)
add_op(block_op(block_op::SET, b, c));
else {
recursing_lock lock(*this);
return sm_->set_count(b, c);
}
}
virtual void commit() {
cant_recurse("commit");
sm_->commit();
}
virtual void inc(block_address b) {
if (depth_)
add_op(block_op(block_op::INC, b));
else {
recursing_lock lock(*this);
return sm_->inc(b);
}
}
virtual void dec(block_address b) {
if (depth_)
add_op(block_op(block_op::DEC, b));
else {
recursing_lock lock(*this);
return sm_->dec(b);
}
}
// new_block must not recurse.
virtual boost::optional<block_address>
new_block() {
cant_recurse("new_block");
recursing_lock lock(*this);
return sm_->new_block();
}
virtual boost::optional<block_address>
new_block(block_address begin, block_address end) {
cant_recurse("new_block(range)");
recursing_lock lock(*this);
return sm_->new_block(begin, end);
}
virtual bool count_possibly_greater_than_one(block_address b) const {
if (depth_)
return true;
else {
recursing_const_lock lock(*this);
return sm_->count_possibly_greater_than_one(b);
}
}
virtual void extend(block_address extra_blocks) {
cant_recurse("extend");
recursing_lock lock(*this);
return sm_->extend(extra_blocks);
}
virtual void iterate(iterator &it) const {
sm_->iterate(it);
}
virtual size_t root_size() const {
cant_recurse("root_size");
recursing_const_lock lock(*this);
return sm_->root_size();
}
virtual void copy_root(void *dest, size_t len) const {
cant_recurse("copy_root");
recursing_const_lock lock(*this);
return sm_->copy_root(dest, len);
}
virtual void check(persistent_data::block_counter &counter) const {
cant_recurse("check");
recursing_const_lock lock(*this);
return sm_->check(counter);
}
virtual checked_space_map::ptr clone() const {
return checked_space_map::ptr(new sm_recursive(sm_->clone()));
}
void flush_ops() {
op_map::const_iterator it, end = ops_.end();
for (it = ops_.begin(); it != end; ++it) {
list<block_op> const &ops = it->second;
list<block_op>::const_iterator op_it, op_end = ops.end();
for (op_it = ops.begin(); op_it != op_end; ++op_it) {
recursing_lock lock(*this);
switch (op_it->op_) {
case block_op::INC:
sm_->inc(op_it->b_);
break;
case block_op::DEC:
sm_->dec(op_it->b_);
break;
case block_op::SET:
sm_->set_count(op_it->b_, op_it->rc_);
break;
}
}
}
ops_.clear();
}
private:
void add_op(block_op const &op) {
ops_[op.b_].push_back(op);
}
void cant_recurse(string const &method) const {
if (depth_)
throw runtime_error("recursive '" + method + "' not supported");
}
struct recursing_lock {
recursing_lock(sm_recursive &smr)
: smr_(smr) {
smr_.depth_++;
}
~recursing_lock() {
if (!--smr_.depth_)
smr_.flush_ops();
}
private:
sm_recursive &smr_;
};
struct recursing_const_lock {
recursing_const_lock(sm_recursive const &smr)
: smr_(smr) {
smr_.depth_++;
}
~recursing_const_lock() {
smr_.depth_--;
}
private:
sm_recursive const &smr_;
};
checked_space_map::ptr sm_;
mutable int depth_;
enum op {
BOP_INC,
BOP_DEC,
BOP_SET
};
typedef map<block_address, list<block_op> > op_map;
op_map ops_;
};
}
//----------------------------------------------------------------
checked_space_map::ptr
persistent_data::create_recursive_sm(checked_space_map::ptr sm)
{
return checked_space_map::ptr(new sm_recursive(sm));
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,32 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef SPACE_MAP_RECURSIVE_H
#define SPACE_MAP_RECURSIVE_H
#include "space_map.h"
//----------------------------------------------------------------
namespace persistent_data {
checked_space_map::ptr create_recursive_sm(checked_space_map::ptr sm);
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,131 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "persistent-data/space_map_transactional.h"
//----------------------------------------------------------------
namespace {
class sm_transactional : public checked_space_map {
public:
typedef shared_ptr<sm_transactional> ptr;
sm_transactional(checked_space_map::ptr sm)
: sm_(sm),
committed_(sm_->clone()),
allocated_(0),
search_start_(0) {
}
virtual block_address get_nr_blocks() const {
return committed_->get_nr_blocks();
}
virtual block_address get_nr_free() const {
return committed_->get_nr_free() - allocated_;
}
virtual ref_t get_count(block_address b) const {
return sm_->get_count(b);
}
virtual void set_count(block_address b, ref_t c) {
sm_->set_count(b, c);
}
virtual void commit() {
sm_->commit();
committed_ = sm_->clone();
allocated_ = 0;
search_start_ = 0;
}
virtual void inc(block_address b) {
// FIXME: this may do an implicit allocation, so
// search_start_ and allocated_ will be wrong.
sm_->inc(b);
}
virtual void dec(block_address b) {
sm_->dec(b);
}
virtual maybe_block new_block() {
return new_block(0, sm_->get_nr_blocks());
}
virtual maybe_block new_block(block_address begin, block_address end) {
if (end <= search_start_)
return maybe_block();
maybe_block mb = committed_->new_block(max(search_start_, begin), end);
if (mb) {
allocated_++;
search_start_ = *mb + 1;
} else
search_start_ = end;
return mb;
}
virtual bool count_possibly_greater_than_one(block_address b) const {
return sm_->count_possibly_greater_than_one(b);
}
virtual void extend(block_address extra_blocks) {
return sm_->extend(extra_blocks);
}
virtual void iterate(iterator &it) const {
sm_->iterate(it);
}
virtual size_t root_size() const {
return sm_->root_size();
}
virtual void copy_root(void *dest, size_t len) const {
return sm_->copy_root(dest, len);
}
virtual void check(block_counter &counter) const {
return sm_->check(counter);
}
virtual checked_space_map::ptr clone() const {
return checked_space_map::ptr(new sm_transactional(sm_));
}
private:
checked_space_map::ptr sm_;
checked_space_map::ptr committed_;
block_address allocated_;
block_address search_start_;
};
}
//----------------------------------------------------------------
checked_space_map::ptr
persistent_data::create_transactional_sm(checked_space_map::ptr sm)
{
return checked_space_map::ptr(new sm_transactional(sm));
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef SPACE_MAP_TRANSACTIONAL_H
#define SPACE_MAP_TRANSACTIONAL_H
#include "space_map.h"
//----------------------------------------------------------------
namespace persistent_data {
// FIXME: change name 'transactional' is so vague.
// This space map ensures no blocks are allocated which have been
// freed within the current transaction.
checked_space_map::ptr create_transactional_sm(checked_space_map::ptr sm);
}
//----------------------------------------------------------------
#endif

View File

@@ -0,0 +1,122 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "persistent-data/transaction_manager.h"
#include <string.h>
using namespace boost;
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
transaction_manager::transaction_manager(block_manager<>::ptr bm,
space_map::ptr sm)
: bm_(bm),
sm_(sm)
{
}
transaction_manager::~transaction_manager()
{
}
transaction_manager::write_ref
transaction_manager::begin(block_address superblock, validator v)
{
write_ref wr = bm_->superblock(superblock, v);
wipe_shadow_table();
return wr;
}
transaction_manager::write_ref
transaction_manager::new_block(validator v)
{
optional<block_address> mb = sm_->new_block();
if (!mb)
throw runtime_error("couldn't allocate new block");
sm_decrementer decrementer(sm_, *mb);
write_ref wr = bm_->write_lock_zero(*mb, v);
add_shadow(*mb);
decrementer.dont_bother();
return wr;
}
pair<transaction_manager::write_ref, bool>
transaction_manager::shadow(block_address orig, validator v)
{
if (is_shadow(orig) &&
!sm_->count_possibly_greater_than_one(orig))
return make_pair(bm_->write_lock(orig, v), false);
read_ref src = bm_->read_lock(orig, v);
optional<block_address> mb = sm_->new_block();
if (!mb)
throw runtime_error("couldn't allocate new block");
write_ref dest = bm_->write_lock_zero(*mb, v);
::memcpy(dest.data().raw(), src.data().raw(), MD_BLOCK_SIZE); // FIXME: use buffer copy method
ref_t count = sm_->get_count(orig);
if (count == 0)
throw runtime_error("shadowing free block");
sm_->dec(orig);
add_shadow(dest.get_location());
return make_pair(dest, count > 1);
}
transaction_manager::read_ref
transaction_manager::read_lock(block_address b)
{
return bm_->read_lock(b);
}
transaction_manager::read_ref
transaction_manager::read_lock(block_address b, validator v)
{
return bm_->read_lock(b, v);
}
void
transaction_manager::add_shadow(block_address b)
{
shadows_.insert(b);
}
void
transaction_manager::remove_shadow(block_address b)
{
shadows_.erase(b);
}
bool
transaction_manager::is_shadow(block_address b) const
{
return shadows_.count(b) > 0;
}
void
transaction_manager::wipe_shadow_table()
{
shadows_.clear();
}
//----------------------------------------------------------------

View File

@@ -0,0 +1,84 @@
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef TRANSACTION_MANAGER_H
#define TRANSACTION_MANAGER_H
#include "block.h"
#include "space_map.h"
#include <set>
#include <boost/shared_ptr.hpp>
//----------------------------------------------------------------
namespace persistent_data {
class transaction_manager : boost::noncopyable {
public:
typedef boost::shared_ptr<transaction_manager> ptr;
typedef block_manager<>::read_ref read_ref;
typedef block_manager<>::write_ref write_ref;
typedef block_manager<>::validator::ptr validator;
// If the space map is persistent, then the caller should
// hold onto a reference and remember to call sm_->commit()
// and update the superblock before dropping the superblock
// reference.
transaction_manager(block_manager<>::ptr bm,
space_map::ptr sm);
~transaction_manager();
// Drop the superblock reference to commit
write_ref begin(block_address superblock, validator v);
write_ref new_block(validator v);
// shadowing returns a new write_ref, and a boolean which
// indicates whether the children should be incremented.
std::pair<write_ref, bool> shadow(block_address orig, validator v);
read_ref read_lock(block_address b);
read_ref read_lock(block_address b, validator v);
space_map::ptr get_sm() {
return sm_;
}
void set_sm(space_map::ptr sm) {
sm_ = sm;
}
block_manager<>::ptr get_bm() {
return bm_;
}
private:
void add_shadow(block_address b);
void remove_shadow(block_address b);
bool is_shadow(block_address b) const;
void wipe_shadow_table();
block_manager<>::ptr bm_;
space_map::ptr sm_;
std::set<block_address> shadows_;
};
}
//----------------------------------------------------------------
#endif