space map disk

This commit is contained in:
Joe Thornber 2011-07-22 16:09:56 +01:00
parent 186753d443
commit a683979585
25 changed files with 1389 additions and 221 deletions

4
.gitignore vendored
View File

@ -1,3 +1,5 @@
*~ *~
*.o *.o
multisnap_display *_t
*.d
test.data

View File

@ -1,24 +1,30 @@
SOURCE=\ SOURCE=\
metadata.cc endian.cc \
metadata.cc \
metadata_disk_structures.cc \
space_map_disk.cc \
PROGRAM_SOURCE=\ TEST_SOURCE=\
block_t.cc \ block_t.cc \
btree_t.cc \ btree_t.cc \
endian_t.cc \
metadata_t.cc \
space_map_t.cc \ space_map_t.cc \
transaction_manager_t.cc space_map_disk_t.cc \
transaction_manager_t.cc \
OBJECTS=$(subst .cc,.o,$(SOURCE)) OBJECTS=$(subst .cc,.o,$(SOURCE))
TEST_PROGRAMS=$(subst .cc,,$(TEST_SOURCE))
CPPFLAGS=-Wall -std=c++0x -g CPPFLAGS=-Wall -std=c++0x -g
INCLUDES= INCLUDES=
LIBS=-lstdc++ LIBS=-lstdc++
.PHONEY: unit-tests .PHONEY: unit-tests test-programs
unit-tests: block_t btree_t space_map_t transaction_manager_t test-programs: $(TEST_PROGRAMS)
./block_t
./btree_t unit-tests: $(TEST_PROGRAMS)
./space_map_t for p in $(TEST_PROGRAMS); do echo Running $$p; ./$$p; done
./transaction_manager_t
.SUFFIXES: .cc .o .d .SUFFIXES: .cc .o .d
@ -42,8 +48,17 @@ btree_t: btree_t.o
space_map_t: space_map_t.o space_map_t: space_map_t.o
g++ $(CPPFLAGS) -o $@ $+ $(LIBS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS)
space_map_disk_t: space_map_disk_t.o $(OBJECTS)
g++ $(CPPFLAGS) -o $@ $+ $(LIBS)
transaction_manager_t: transaction_manager_t.o transaction_manager_t: transaction_manager_t.o
g++ $(CPPFLAGS) -o $@ $+ $(LIBS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS)
metadata_t: metadata_t.o $(OBJECTS)
g++ $(CPPFLAGS) -o $@ $+ $(LIBS)
endian_t: endian_t.o $(OBJECTS)
g++ $(CPPFLAGS) -o $@ $+ $(LIBS)
include $(subst .cc,.d,$(SOURCE)) include $(subst .cc,.d,$(SOURCE))
include $(subst .cc,.d,$(PROGRAM_SOURCE)) include $(subst .cc,.d,$(TEST_SOURCE))

12
block.h
View File

@ -156,6 +156,8 @@ namespace persistent_data {
// held. // held.
void flush(); void flush();
block_address get_nr_blocks() const;
private: private:
void check(block_address b) const; void check(block_address b) const;
@ -165,11 +167,21 @@ namespace persistent_data {
void read_release(block *b) const; void read_release(block *b) const;
void write_release(block *b); void write_release(block *b);
enum lock_type {
READ_LOCK,
WRITE_LOCK
};
void register_lock(block_address b, lock_type t) const;
void unregister_lock(block_address b, lock_type t) const;
int fd_; int fd_;
block_address nr_blocks_; block_address nr_blocks_;
mutable unsigned lock_count_; mutable unsigned lock_count_;
mutable unsigned superblock_count_; mutable unsigned superblock_count_;
mutable unsigned ordinary_count_; mutable unsigned ordinary_count_;
mutable std::map<block_address, std::pair<lock_type, unsigned> > held_locks_;
}; };
} }

View File

@ -6,7 +6,6 @@
#include <unistd.h> #include <unistd.h>
#include <boost/bind.hpp> #include <boost/bind.hpp>
#include <iostream>
#include <stdexcept> #include <stdexcept>
using namespace boost; using namespace boost;
@ -76,10 +75,11 @@ block_manager<BlockSize>::read_lock(block_address location) const
buffer buf; buffer buf;
read_buffer(location, buf); read_buffer(location, buf);
register_lock(location, READ_LOCK);
return read_ref( return read_ref(
typename block::ptr( typename block::ptr(
new block(location, buf, lock_count_, ordinary_count_))); new block(location, buf, lock_count_, ordinary_count_),
bind(&block_manager::read_release, this, _1)));
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -97,6 +97,7 @@ block_manager<BlockSize>::write_lock(block_address location)
buffer buf; buffer buf;
read_buffer(location, buf); read_buffer(location, buf);
register_lock(location, WRITE_LOCK);
return write_ref( return write_ref(
typename block::ptr( typename block::ptr(
new block(location, buf, lock_count_, ordinary_count_), new block(location, buf, lock_count_, ordinary_count_),
@ -113,6 +114,7 @@ block_manager<BlockSize>::write_lock_zero(block_address location)
zero_buffer(buf); zero_buffer(buf);
typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_), typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_),
bind(&block_manager<BlockSize>::write_release, this, _1)); bind(&block_manager<BlockSize>::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -125,7 +127,9 @@ block_manager<BlockSize>::read_lock(block_address location,
buffer buf; buffer buf;
read_buffer(location, buf); read_buffer(location, buf);
typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v)); typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v),
bind(&block_manager::read_release, this, _1));
register_lock(location, READ_LOCK);
return read_ref(b); return read_ref(b);
} }
@ -148,6 +152,7 @@ block_manager<BlockSize>::write_lock(block_address location,
read_buffer(location, buf); read_buffer(location, buf);
typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v), typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -162,6 +167,7 @@ block_manager<BlockSize>::write_lock_zero(block_address location,
zero_buffer(buf); zero_buffer(buf);
typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v), typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -178,6 +184,7 @@ block_manager<BlockSize>::superblock(block_address location)
read_buffer(location, buf); read_buffer(location, buf);
typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true), typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -194,6 +201,7 @@ block_manager<BlockSize>::superblock_zero(block_address location)
zero_buffer(buf); zero_buffer(buf);
typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true), typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -211,6 +219,7 @@ block_manager<BlockSize>::superblock(block_address location,
read_buffer(location, buf); read_buffer(location, buf);
typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v), typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -228,6 +237,7 @@ block_manager<BlockSize>::superblock_zero(block_address location,
zero_buffer(buf); zero_buffer(buf);
typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v), typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v),
bind(&block_manager::write_release, this, _1)); bind(&block_manager::write_release, this, _1));
register_lock(location, WRITE_LOCK);
return write_ref(b); return write_ref(b);
} }
@ -292,7 +302,7 @@ template <uint32_t BlockSize>
void void
block_manager<BlockSize>::zero_buffer(block_manager<BlockSize>::buffer &buffer) const block_manager<BlockSize>::zero_buffer(block_manager<BlockSize>::buffer &buffer) const
{ {
memset(buffer, 0, BlockSize); ::memset(buffer, 0, BlockSize);
} }
// FIXME: we don't need this anymore // FIXME: we don't need this anymore
@ -300,6 +310,7 @@ template <uint32_t BlockSize>
void void
block_manager<BlockSize>::read_release(block *b) const block_manager<BlockSize>::read_release(block *b) const
{ {
unregister_lock(b->location_, READ_LOCK);
delete b; delete b;
} }
@ -316,6 +327,7 @@ block_manager<BlockSize>::write_release(block *b)
(*b->validator_)->prepare(*b); (*b->validator_)->prepare(*b);
write_buffer(b->location_, b->data_); write_buffer(b->location_, b->data_);
unregister_lock(b->location_, WRITE_LOCK);
delete b; delete b;
} }
@ -327,4 +339,46 @@ block_manager<BlockSize>::check(block_address b) const
throw std::runtime_error("block address out of bounds"); throw std::runtime_error("block address out of bounds");
} }
template <uint32_t BlockSize>
block_address
block_manager<BlockSize>::get_nr_blocks() const
{
return nr_blocks_;
}
// FIXME: how do we unregister if block construction throws?
template <uint32_t BlockSize>
void
block_manager<BlockSize>::register_lock(block_address b, lock_type t) const
{
auto it = held_locks_.find(b);
if (it == held_locks_.end())
held_locks_.insert(make_pair(b, make_pair(t, 1)));
else {
if (it->second.first != t)
throw std::runtime_error("lock type mismatch when locking");
if (it->second.first == WRITE_LOCK)
throw std::runtime_error("cannot hold concurrent write locks");
it->second.second++;
}
}
template <uint32_t BlockSize>
void
block_manager<BlockSize>::unregister_lock(block_address b, lock_type t) const
{
auto it = held_locks_.find(b);
if (it == held_locks_.end())
throw std::runtime_error("lock not held");
if (it->second.first != t)
throw std::runtime_error("lock type mismatch when unlocking");
it->second.second--;
if (it->second.second == 0)
held_locks_.erase(it);
}
//---------------------------------------------------------------- //----------------------------------------------------------------

View File

@ -167,6 +167,32 @@ BOOST_AUTO_TEST_CASE(flush_throws_if_held_locks)
BOOST_CHECK_THROW(bm->flush(), runtime_error); BOOST_CHECK_THROW(bm->flush(), runtime_error);
} }
// cannot write lock the same block more than once BOOST_AUTO_TEST_CASE(no_concurrent_write_locks)
{
auto bm = create_bm();
auto wr = bm->write_lock(0);
BOOST_CHECK_THROW(bm->write_lock(0), runtime_error);
}
BOOST_AUTO_TEST_CASE(concurrent_read_locks)
{
auto bm = create_bm();
auto rr = bm->read_lock(0);
bm->read_lock(0);
}
BOOST_AUTO_TEST_CASE(read_then_write)
{
auto bm = create_bm();
bm->read_lock(0);
bm->write_lock(0);
}
BOOST_AUTO_TEST_CASE(write_then_read)
{
auto bm = create_bm();
bm->write_lock(0);
bm->read_lock(0);
}
//---------------------------------------------------------------- //----------------------------------------------------------------

33
btree.h
View File

@ -12,9 +12,17 @@
namespace persistent_data { namespace persistent_data {
template <typename ValueType>
class NoOpRefCounter {
public:
void inc(ValueType const &v) {}
void dec(ValueType const &v) {}
};
struct uint64_traits { struct uint64_traits {
typedef base::__le64 disk_type; typedef base::__le64 disk_type;
typedef uint64_t value_type; typedef uint64_t value_type;
typedef NoOpRefCounter<uint64_t> ref_counter;
static void unpack(disk_type const &disk, value_type &value) { static void unpack(disk_type const &disk, value_type &value) {
value = base::to_cpu<uint64_t>(disk); value = base::to_cpu<uint64_t>(disk);
@ -72,6 +80,8 @@ namespace persistent_data {
unsigned get_max_entries() const; unsigned get_max_entries() const;
void set_max_entries(unsigned n); void set_max_entries(unsigned n);
// FIXME: remove this, and get the constructor to do it.
void set_max_entries(); // calculates the max for you. void set_max_entries(); // calculates the max for you.
uint64_t key_at(unsigned i) const; uint64_t key_at(unsigned i) const;
@ -102,6 +112,14 @@ namespace persistent_data {
optional<unsigned> exact_search(uint64_t key) const; optional<unsigned> exact_search(uint64_t key) const;
int lower_bound(uint64_t key) const; int lower_bound(uint64_t key) const;
template <typename RefCounter>
void inc_children(RefCounter &rc);
// FIXME: remove
void *raw() {
return raw_;
}
private: private:
static unsigned calc_max_entries(void); static unsigned calc_max_entries(void);
@ -206,6 +224,10 @@ namespace persistent_data {
return to_node<uint64_traits, BlockSize>(spine_.front()); return to_node<uint64_traits, BlockSize>(spine_.front());
} }
block_address get_parent_location() const {
return spine_.front().get_location();
}
block_address get_root() const { block_address get_root() const {
return root_; return root_;
} }
@ -216,6 +238,7 @@ namespace persistent_data {
block_address root_; block_address root_;
}; };
// FIXME: make a member of btree
template <typename ValueTraits, uint32_t BlockSize> template <typename ValueTraits, uint32_t BlockSize>
optional<typename ValueTraits::value_type> optional<typename ValueTraits::value_type>
lookup_raw(ro_spine<BlockSize> &spine, block_address block, uint64_t key) { lookup_raw(ro_spine<BlockSize> &spine, block_address block, uint64_t key) {
@ -252,9 +275,13 @@ namespace persistent_data {
typedef typename block_manager<BlockSize>::read_ref read_ref; typedef typename block_manager<BlockSize>::read_ref read_ref;
typedef typename block_manager<BlockSize>::write_ref write_ref; typedef typename block_manager<BlockSize>::write_ref write_ref;
btree(typename persistent_data::transaction_manager<BlockSize>::ptr tm); btree(typename persistent_data::transaction_manager<BlockSize>::ptr tm,
typename ValueTraits::ref_counter rc);
btree(typename transaction_manager<BlockSize>::ptr tm, btree(typename transaction_manager<BlockSize>::ptr tm,
block_address root); block_address root,
typename ValueTraits::ref_counter rc);
~btree(); ~btree();
maybe_value lookup(key const &key) const; maybe_value lookup(key const &key) const;
@ -297,6 +324,8 @@ namespace persistent_data {
typename persistent_data::transaction_manager<BlockSize>::ptr tm_; typename persistent_data::transaction_manager<BlockSize>::ptr tm_;
bool destroy_; bool destroy_;
block_address root_; block_address root_;
NoOpRefCounter<uint64_t> internal_rc_;
typename ValueTraits::ref_counter rc_;
}; };
}; };

View File

@ -155,8 +155,8 @@ node_ref<ValueTraits, BlockSize>::copy_entries(node_ref const &rhs,
throw runtime_error("too many entries"); throw runtime_error("too many entries");
set_nr_entries(n + count); set_nr_entries(n + count);
::memcpy(rhs.key_ptr(begin), key_ptr(n), sizeof(uint64_t) * count); ::memcpy(key_ptr(n), rhs.key_ptr(begin), sizeof(uint64_t) * count);
::memcpy(rhs.value_ptr(begin), value_ptr(n), sizeof(typename ValueTraits::disk_type) * count); ::memcpy(value_ptr(n), rhs.value_ptr(begin), sizeof(typename ValueTraits::disk_type) * count);
} }
template <typename ValueTraits, uint32_t BlockSize> template <typename ValueTraits, uint32_t BlockSize>
@ -228,12 +228,30 @@ node_ref<ValueTraits, BlockSize>::value_ptr(unsigned i) const
sizeof(typename ValueTraits::disk_type) * i; sizeof(typename ValueTraits::disk_type) * i;
} }
template <typename ValueTraits, uint32_t BlockSize>
template <typename RefCounter>
void
node_ref<ValueTraits, BlockSize>::inc_children(RefCounter &rc)
{
unsigned nr_entries = get_nr_entries();
for (unsigned i = 0; i < nr_entries; i++) {
typename ValueTraits::value_type v;
typename ValueTraits::disk_type d;
::memcpy(&d, value_ptr(i), sizeof(d));
ValueTraits::unpack(d, v);
rc.inc(v);
}
}
//---------------------------------------------------------------- //----------------------------------------------------------------
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
btree<Levels, ValueTraits, BlockSize>::btree(typename transaction_manager<BlockSize>::ptr tm) btree<Levels, ValueTraits, BlockSize>::
btree(typename transaction_manager<BlockSize>::ptr tm,
typename ValueTraits::ref_counter rc)
: tm_(tm), : tm_(tm),
destroy_(false) destroy_(false),
rc_(rc)
{ {
using namespace btree_detail; using namespace btree_detail;
@ -248,11 +266,14 @@ btree<Levels, ValueTraits, BlockSize>::btree(typename transaction_manager<BlockS
} }
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
btree<Levels, ValueTraits, BlockSize>::btree(typename transaction_manager<BlockSize>::ptr tm, btree<Levels, ValueTraits, BlockSize>::
block_address root) btree(typename transaction_manager<BlockSize>::ptr tm,
block_address root,
typename ValueTraits::ref_counter rc)
: tm_(tm), : tm_(tm),
destroy_(false), destroy_(false),
root_(root) root_(root),
rc_(rc)
{ {
} }
@ -318,7 +339,7 @@ insert(key const &key,
auto n = spine.template get_node<uint64_traits>(); auto n = spine.template get_node<uint64_traits>();
if (need_insert) { if (need_insert) {
btree<Levels - 1, ValueTraits, BlockSize> new_tree(tm_); btree<Levels - 1, ValueTraits, BlockSize> new_tree(tm_, rc_);
n.insert_at(index, key[level], new_tree.get_root()); n.insert_at(index, key[level], new_tree.get_root());
} }
@ -335,20 +356,18 @@ insert(key const &key,
n.set_value(index, value); n.set_value(index, value);
} }
#if 0
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
void void
btree<Levels, ValueTraits, BlockSize>::remove(key const &key) btree<Levels, ValueTraits, BlockSize>::remove(key const &key)
{ {
using namespace btree_detail; using namespace btree_detail;
} }
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
block_address block_address
btree<Levels, ValueTraits, BlockSize>::get_root() const btree<Levels, ValueTraits, BlockSize>::get_root() const
{ {
return root_;
} }
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
@ -356,25 +375,40 @@ void
btree<Levels, ValueTraits, BlockSize>::set_root(block_address root) btree<Levels, ValueTraits, BlockSize>::set_root(block_address root)
{ {
using namespace btree_detail; using namespace btree_detail;
root_ = root;
} }
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
block_address typename btree<Levels, ValueTraits, BlockSize>::ptr
btree<Levels, ValueTraits, BlockSize>::get_root() const
{
using namespace btree_detail;
}
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
ptr
btree<Levels, ValueTraits, BlockSize>::clone() const btree<Levels, ValueTraits, BlockSize>::clone() const
{ {
using namespace btree_detail; using namespace btree_detail;
ro_spine<BlockSize> spine(tm_);
spine.step(root_);
auto new_root = tm_->new_block();
auto o = spine.template get_node<uint64_traits>();
if (o.get_type() == INTERNAL) {
auto n = to_node<uint64_traits, BlockSize>(new_root);
::memcpy(n.raw(), o.raw(), BlockSize);
typename uint64_traits::ref_counter rc(internal_rc_);
n.inc_children(rc);
} else {
auto n = to_node<ValueTraits, BlockSize>(new_root);
::memcpy(n.raw(), o.raw(), BlockSize);
typename ValueTraits::ref_counter rc(rc_);
n.inc_children(rc);
}
return btree<Levels, ValueTraits, BlockSize>::ptr(
new btree<Levels, ValueTraits, BlockSize>(
tm_, new_root.get_location(), rc_));
} }
#if 0
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize> template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
void void
btree<Levels, ValueTraits, BlockSize>::destroy() btree<Levels, ValueTraits, BlockSize>::destroy()
@ -414,7 +448,6 @@ split_beneath(btree_detail::shadow_spine<BlockSize> &spine,
node_type type; node_type type;
unsigned nr_left, nr_right; unsigned nr_left, nr_right;
auto left = tm_->new_block(); auto left = tm_->new_block();
auto l = to_node<ValueTraits, BlockSize>(left); auto l = to_node<ValueTraits, BlockSize>(left);
l.set_nr_entries(0); l.set_nr_entries(0);
@ -457,9 +490,10 @@ split_beneath(btree_detail::shadow_spine<BlockSize> &spine,
template <unsigned Levels, typename _, uint32_t BlockSize> template <unsigned Levels, typename _, uint32_t BlockSize>
template <typename ValueTraits> template <typename ValueTraits>
void void
btree<Levels, _, BlockSize>::split_sibling(btree_detail::shadow_spine<BlockSize> &spine, btree<Levels, _, BlockSize>::
block_address parent_index, split_sibling(btree_detail::shadow_spine<BlockSize> &spine,
uint64_t key) block_address parent_index,
uint64_t key)
{ {
using namespace btree_detail; using namespace btree_detail;

View File

@ -24,9 +24,11 @@ namespace {
btree<1, uint64_traits, 4096>::ptr btree<1, uint64_traits, 4096>::ptr
create_btree() { create_btree() {
typename uint64_traits::ref_counter rc;
return btree<1, uint64_traits, 4096>::ptr( return btree<1, uint64_traits, 4096>::ptr(
new btree<1, uint64_traits, 4096>( new btree<1, uint64_traits, 4096>(
create_tm())); create_tm(), rc));
} }
} }

46
endian.cc Normal file
View File

@ -0,0 +1,46 @@
#include "endian.h"
using namespace base;
//----------------------------------------------------------------
bool
base::test_bit_le(void const *bits, unsigned b)
{
__le64 const *w = reinterpret_cast<__le64 const *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
return (v & mask) ? true : false;
}
void
base::set_bit_le(void *bits, unsigned b)
{
__le64 *w = reinterpret_cast<__le64 *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
v |= mask;
*w = to_disk<__le64>(v);
}
void
base::clear_bit_le(void *bits, unsigned b)
{
__le64 *w = reinterpret_cast<__le64 *>(bits);
w += b / 64;
uint64_t v = to_cpu<uint64_t>(*w);
uint64_t mask = 1;
mask = mask << (b % 64);
mask = ~mask;
v &= mask;
*w = to_disk<__le64>(v);
}
//----------------------------------------------------------------

View File

@ -1,16 +1,17 @@
#ifndef ENDIAN_H #ifndef ENDIAN_H
#define ENDIAN_H #define ENDIAN_H
#include <stdint.h>
#include <boost/static_assert.hpp> #include <boost/static_assert.hpp>
//---------------------------------------------------------------- //----------------------------------------------------------------
// FIXME: rename to endian
namespace base { namespace base {
// These are just little wrapper types to make the compiler // These are just little wrapper types to make the compiler
// understand that the le types are not assignable to the // understand that the le types are not assignable to the
// corresponding cpu type. // corresponding cpu type.
struct __le16 { struct __le16 {
explicit __le16(uint16_t v = 0) explicit __le16(uint16_t v = 0)
: v_(v) { : v_(v) {
@ -37,6 +38,7 @@ namespace base {
//-------------------------------- //--------------------------------
// FIXME: actually do the conversions !
template <typename CPUType, typename DiskType> template <typename CPUType, typename DiskType>
CPUType to_cpu(DiskType const &d) { CPUType to_cpu(DiskType const &d) {
BOOST_STATIC_ASSERT(sizeof(d) == 0); BOOST_STATIC_ASSERT(sizeof(d) == 0);
@ -76,6 +78,12 @@ namespace base {
inline __le64 to_disk<__le64, uint64_t>(uint64_t const &v) { inline __le64 to_disk<__le64, uint64_t>(uint64_t const &v) {
return __le64(v); return __le64(v);
} }
//--------------------------------
bool test_bit_le(void const *bits, unsigned b);
void set_bit_le(void *bits, unsigned b);
void clear_bit_le(void *bits, unsigned b);
} }
//---------------------------------------------------------------- //----------------------------------------------------------------

62
endian_t.cc Normal file
View File

@ -0,0 +1,62 @@
#include "space_map_disk.h"
#include "core_map.h"
#define BOOST_TEST_MODULE EndianTests
#include <boost/test/included/unit_test.hpp>
using namespace base;
using namespace boost;
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
BOOST_AUTO_TEST_CASE(bitmaps)
{
unsigned NR_BITS = 10247;
vector<uint64_t> data((NR_BITS + 63) / 64, 0);
// check all bits are zero
void *bits = &data[0];
for (unsigned i = 0; i < NR_BITS; i++)
BOOST_CHECK(!test_bit_le(bits, i));
// set all bits to one
for (unsigned i = 0; i < NR_BITS; i++)
set_bit_le(bits, i);
// check they're all 1 now
for (unsigned i = 0; i < NR_BITS; i++)
BOOST_CHECK(test_bit_le(bits, i));
// clear every third bit
for (unsigned i = 0; i < NR_BITS; i += 3)
clear_bit_le(bits, i);
// check everything is as we expect
for (unsigned i = 0; i < NR_BITS; i++) {
if ((i % 3) == 0)
BOOST_CHECK(!test_bit_le(bits, i));
else
BOOST_CHECK(test_bit_le(bits, i));
}
}
BOOST_AUTO_TEST_CASE(bitmaps_alternate_words)
{
unsigned NR_BITS = 10247;
vector<uint64_t> data((NR_BITS + 63) / 64, 0);
// check all bits are zero
void *bits = &data[0];
for (unsigned i = 0; i < 128; i++)
BOOST_CHECK(!test_bit_le(bits, i));
for (unsigned i = 0; i < 64; i++)
set_bit_le(bits, i);
for (unsigned i = 64; i < 128; i++)
BOOST_CHECK(!test_bit_le(bits, i));
}
//----------------------------------------------------------------

View File

@ -1,9 +1,12 @@
#include "block.h" #include "block.h"
#include "metadata.h"
#include <iostream> #include <iostream>
using namespace thinp;
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
std::cout << "Hello, world!" << std::endl; metadata m("foo.metadata", 128, 1024);
return 0; return 0;
} }

16
math.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef THINP_MATH_H
#define THINP_MATH_H
//----------------------------------------------------------------
namespace base {
// Only works for integral types
template <typename T>
T div_up(T const &v, T const &divisor) {
return (v + (divisor - 1)) / divisor;
}
}
//----------------------------------------------------------------
#endif

View File

@ -3,96 +3,106 @@
#include <stdexcept> #include <stdexcept>
using namespace persistent_data; using namespace persistent_data;
using namespace multisnap; using namespace thin_provisioning;
//---------------------------------------------------------------- //----------------------------------------------------------------
namespace { namespace {
typedef uint8_t __le8;
typedef uint8_t __u8;
typedef uint32_t __le32;
typedef uint64_t __le64;
uint32_t const SUPERBLOCK_MAGIC = 27022010; uint32_t const SUPERBLOCK_MAGIC = 27022010;
block_address const SUPERBLOCK_LOCATION = 0; block_address const SUPERBLOCK_LOCATION = 0;
uint32_t const VERSION = 1; uint32_t const VERSION = 1;
unsigned const METADATA_CACHE_SIZE = 1024; unsigned const METADATA_CACHE_SIZE = 1024;
unsigned const SECTOR_TO_BLOCK_SHIFT = 3; unsigned const SECTOR_TO_BLOCK_SHIFT = 3;
unsigned const SPACE_MAP_ROOT_SIZE = 128;
struct multisnap_super_block {
__le32 csum_;
__le32 flags_;
__le64 blocknr_; /* this block number, dm_block_t */
__u8 uuid_[16];
__le64 magic_;
__le32 version_;
__le32 time_;
__le64 trans_id_;
/* root for userspace's transaction (for migration and friends) */
__le64 held_root_;
__u8 data_space_map_root_[SPACE_MAP_ROOT_SIZE];
__u8 metadata_space_map_root_[SPACE_MAP_ROOT_SIZE];
/* 2 level btree mapping (dev_id, (dev block, time)) -> data block */
__le64 data_mapping_root_;
/* device detail root mapping dev_id -> device_details */
__le64 device_details_root_;
__le32 data_block_size_; /* in 512-byte sectors */
__le32 metadata_block_size_; /* in 512-byte sectors */
__le64 metadata_nr_blocks_;
__le32 compat_flags_;
__le32 incompat_flags_;
} __attribute__ ((packed));
struct device_details {
__le64 dev_size_;
__le64 mapped_blocks_;
__le64 transaction_id_; /* when created */
__le32 creation_time_;
__le32 snapshotted_time_;
} __attribute__ ((packed));
} }
//---------------------------------------------------------------- //----------------------------------------------------------------
metadata::thin::maybe_address thin::thin(thin_dev_t dev, metadata *metadata)
metadata::thin::lookup(block_address thin_block) : dev_(dev),
metadata_(metadata)
{
}
thin_dev_t
thin::get_dev_t() const
{
return dev_;
}
thin::maybe_address
thin::lookup(block_address thin_block)
{ {
uint64_t key[2] = {dev_, thin_block}; uint64_t key[2] = {dev_, thin_block};
return metadata_->mappings_.lookup(key); return metadata_->mappings_.lookup(key);
} }
void void
metadata::thin::insert(block_address thin_block, block_address data_block) thin::insert(block_address thin_block, block_address data_block)
{ {
uint64_t key[2] = {dev_, thin_block}; uint64_t key[2] = {dev_, thin_block};
return metadata_->mappings_.insert(key, data_block); return metadata_->mappings_.insert(key, data_block);
} }
void void
metadata::thin::remove(block_address thin_block) thin::remove(block_address thin_block)
{ {
uint64_t key[2] = {dev_, thin_block}; uint64_t key[2] = {dev_, thin_block};
metadata_->mappings_.remove(key); metadata_->mappings_.remove(key);
} }
#if 0
void
thin::set_snapshot_time(uint32_t time)
{
uint64_t key[1] = { dev_ };
auto mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->snapshotted_time_ = time;
metadata_->details_.insert(key, *mdetail);
}
block_address
thin::get_mapped_blocks() const
{
uint64_t key[1] = { dev_ };
auto mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
return mdetail->mapped_blocks_;
}
void
thin::set_mapped_blocks(block_address count)
{
uint64_t key[1] = { dev_ };
auto mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->mapped_blocks_ = count;
metadata_->details_.insert(key, *mdetail);
}
//-------------------------------- //--------------------------------
metadata::metadata(std::string const &metadata_dev, metadata::metadata(transaction_manager<MD_BLOCK_SIZE>::ptr tm,
block_address superblock,
sector_t data_block_size, sector_t data_block_size,
block_address nr_data_blocks) block_address nr_data_blocks,
bool create)
: superblock_(superblock),
tm_(tm),
details_(tm, typename device_details_traits::ref_counter()),
mappings_top_level_(tm, mtree_ref_counter<MD_BLOCK_SIZE>(tm)),
mappings_(tm, space_map_ref_counter(data_sm_))
{ {
::memset(&sb_, 0, sizeof(sb_));
sb_.data_mapping_root_ = mappings_.get_root();
sb_.device_details_root_ = details_.get_root();
sb_.metadata_block_size_ = MD_BLOCK_SIZE;
sb_.metadata_nr_blocks_ = tm->get_bm()->get_nr_blocks();
} }
metadata::~metadata() metadata::~metadata()
@ -103,24 +113,29 @@ metadata::~metadata()
void void
metadata::commit() metadata::commit()
{ {
sb_.data_mapping_root_ = mappings_.get_root();
sb_.device_details_root_ = details_.get_root();
auto superblock = tm_->get_bm()->superblock(superblock_);
auto disk = reinterpret_cast<superblock_disk *>(superblock.data());
superblock_traits::pack(sb_, *disk);
} }
#endif
void void
metadata::create_thin(dev_t dev) metadata::create_thin(thin_dev_t dev)
{ {
uint64_t key[1] = {dev}; uint64_t key[1] = {dev};
if (device_exists(dev)) if (device_exists(dev))
throw std::runtime_error("Device already exists"); throw std::runtime_error("Device already exists");
single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_)); single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_, space_map_ref_counter(data_sm_)));
mappings_top_level_.insert(key, new_tree->get_root()); mappings_top_level_.insert(key, new_tree->get_root());
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
} }
void void
metadata::create_snap(dev_t dev, dev_t origin) metadata::create_snap(thin_dev_t dev, thin_dev_t origin)
{ {
uint64_t snap_key[1] = {dev}; uint64_t snap_key[1] = {dev};
uint64_t origin_key[1] = {origin}; uint64_t origin_key[1] = {origin};
@ -129,89 +144,95 @@ metadata::create_snap(dev_t dev, dev_t origin)
if (!mtree_root) if (!mtree_root)
throw std::runtime_error("unknown origin"); throw std::runtime_error("unknown origin");
single_mapping_tree otree(tm_, *mtree_root); single_mapping_tree otree(tm_, *mtree_root,
space_map_ref_counter(data_sm_));
single_mapping_tree::ptr clone(otree.clone()); single_mapping_tree::ptr clone(otree.clone());
mappings_top_level_.insert(snap_key, clone->get_root()); mappings_top_level_.insert(snap_key, clone->get_root());
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
time_++; sb_.time_++;
auto o = open(origin); auto o = open_thin(origin);
auto s = open(dev); auto s = open_thin(dev);
o->set_snapshot_time(time_); o->set_snapshot_time(sb_.time_);
s->set_snapshot_time(time_); s->set_snapshot_time(sb_.time_);
s->set_mapped_blocks(o->get_mapped_blocks()); s->set_mapped_blocks(o->get_mapped_blocks());
} }
void void
metadata::del(dev_t dev) metadata::del(thin_dev_t dev)
{ {
uint64_t key[1] = {dev}; uint64_t key[1] = {dev};
mappings_top_level_.remove(key); mappings_top_level_.remove(key);
} }
#if 0
void void
metadata::set_transaction_id(uint64_t id) metadata::set_transaction_id(uint64_t id)
{ {
sb_.trans_id_ = id;
} }
uint64_t uint64_t
metadata::get_transaction_id() const metadata::get_transaction_id() const
{ {
return sb_.trans_id_;
} }
block_address block_address
metadata::get_held_root() const metadata::get_held_root() const
{ {
return sb_.held_root_;
}
thin_ptr
metadata::open_device(dev_t)
{
} }
block_address block_address
metadata::alloc_data_block() metadata::alloc_data_block()
{ {
return data_sm_->new_block();
} }
void void
metadata::free_data_block(block_address b) metadata::free_data_block(block_address b)
{ {
data_sm_->dec(b);
} }
block_address block_address
metadata::get_nr_free_data_blocks() const metadata::get_nr_free_data_blocks() const
{ {
return data_sm_->get_nr_free();
} }
sector_t sector_t
metadata::get_data_block_size() const metadata::get_data_block_size() const
{ {
return sb_.data_block_size_;
} }
block_address block_address
metadata::get_data_dev_size() const metadata::get_data_dev_size() const
{ {
return data_sm_->get_nr_blocks();
} }
#endif
bool thin::ptr
metadata::device_exists(dev_t dev) const metadata::open_thin(thin_dev_t dev)
{ {
uint64_t key[1] = {dev}; uint64_t key[1] = {dev};
auto mval = details_.lookup(key); auto mdetails = details_.lookup(key);
return mval; if (!mdetails)
throw runtime_error("no such device");
thin *ptr = new thin(dev, this);
thin::ptr r(ptr);
return r;
}
bool
metadata::device_exists(thin_dev_t dev) const
{
uint64_t key[1] = {dev};
return details_.lookup(key);
} }
//---------------------------------------------------------------- //----------------------------------------------------------------

View File

@ -4,6 +4,8 @@
#include "block.h" #include "block.h"
#include "transaction_manager.h" #include "transaction_manager.h"
#include "btree.h" #include "btree.h"
#include "endian.h"
#include "metadata_disk_structures.h"
#include <string> #include <string>
@ -11,76 +13,124 @@
//---------------------------------------------------------------- //----------------------------------------------------------------
// FIXME: make a const namespace thin_provisioning {
#define BLOCK_SIZE 4096 unsigned const MD_BLOCK_SIZE = 4096;
// FIXME: don't use namespaces in a header
using namespace base;
using namespace persistent_data;
namespace multisnap {
typedef uint64_t sector_t; typedef uint64_t sector_t;
typedef uint32_t thin_dev_t;
struct device_details_disk { //------------------------------------------------
__le64 dev_size;
__le64 mapped_blocks;
__le64 transaction_id; /* when created */
__le32 creation_time;
__le32 snapshotted_time;
} __attribute__ ((packed));
struct device_details { class space_map_ref_counter {
uint64_t dev_size;
uint64_t mapped_blocks;
uint64_t transaction_id; /* when created */
uint32_t creation_time;
uint32_t snapshotted_time;
};
struct detail_traits {
typedef device_details_disk disk_type;
typedef device_details value_type;
static value_type construct(void *data) {
struct device_details_disk disk;
struct device_details cpu;
::memcpy(&disk, data, sizeof(disk));
cpu.dev_size = to_cpu<uint64_t>(disk.dev_size);
cpu.mapped_blocks = to_cpu<uint64_t>(disk.mapped_blocks);
cpu.transaction_id = to_cpu<uint64_t>(disk.transaction_id);
cpu.creation_time = to_cpu<uint32_t>(disk.creation_time);
cpu.snapshotted_time = to_cpu<uint32_t>(disk.snapshotted_time);
return cpu;
}
};
#if 0
class dev_traits {
public: public:
space_map_ref_counter(space_map::ptr sm)
: sm_(sm) {
}
void inc(block_address b) {
sm_->inc(b);
}
void dec(block_address b) {
sm_->dec(b);
}
private:
space_map::ptr sm_;
};
struct block_traits {
typedef base::__le64 disk_type; typedef base::__le64 disk_type;
typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> value_type; typedef uint64_t value_type;
typedef space_map_ref_counter ref_counter;
static value_type construct(void *data) { static void unpack(disk_type const &disk, value_type &value) {
uint64_t root = uint64_traits::construct(data); value = base::to_cpu<uint64_t>(disk);
}
return value_type static void pack(value_type const &value, disk_type &disk) {
disk = base::to_disk<base::__le64>(value);
} }
}; };
#endif
//------------------------------------------------
template <uint32_t BlockSize>
class mtree_ref_counter {
public:
mtree_ref_counter(typename transaction_manager<BlockSize>::ptr tm)
: tm_(tm) {
}
void inc(block_address b) {
}
void dec(block_address b) {
}
private:
typename transaction_manager<BlockSize>::ptr tm_;
};
template <uint32_t BlockSize>
struct mtree_traits {
typedef base::__le64 disk_type;
typedef uint64_t value_type;
typedef mtree_ref_counter<BlockSize> ref_counter;
static void unpack(disk_type const &disk, value_type &value) {
value = base::to_cpu<uint64_t>(disk);
}
static void pack(value_type const &value, disk_type &disk) {
disk = base::to_disk<base::__le64>(value);
}
};
class metadata;
class thin {
public:
typedef boost::shared_ptr<thin> ptr;
typedef boost::optional<block_address> maybe_address;
thin_dev_t get_dev_t() const;
maybe_address lookup(block_address thin_block);
void insert(block_address thin_block, block_address data_block);
void remove(block_address thin_block);
void set_snapshot_time(uint32_t time);
block_address get_mapped_blocks() const;
void set_mapped_blocks(block_address count);
private:
friend class metadata;
thin(thin_dev_t dev, metadata *metadata);
thin_dev_t dev_;
metadata *metadata_;
};
class metadata { class metadata {
public: public:
typedef boost::shared_ptr<metadata> ptr; typedef boost::shared_ptr<metadata> ptr;
typedef persistent_data::block_address block_address;
metadata(std::string const &metadata_dev, metadata(transaction_manager<MD_BLOCK_SIZE>::ptr tm,
block_address superblock,
sector_t data_block_size, sector_t data_block_size,
persistent_data::block_address nr_data_blocks); block_address nr_data_blocks,
bool create);
~metadata(); ~metadata();
void commit(); void commit();
typedef uint32_t dev_t; void create_thin(thin_dev_t dev);
void create_thin(dev_t dev); void create_snap(thin_dev_t dev, thin_dev_t origin);
void create_snap(dev_t dev, dev_t origin); void del(thin_dev_t);
void del(dev_t);
void set_transaction_id(uint64_t id); void set_transaction_id(uint64_t id);
uint64_t get_transaction_id() const; uint64_t get_transaction_id() const;
@ -95,46 +145,29 @@ namespace multisnap {
sector_t get_data_block_size() const; sector_t get_data_block_size() const;
block_address get_data_dev_size() const; block_address get_data_dev_size() const;
class thin { thin::ptr open_thin(thin_dev_t);
public:
typedef boost::shared_ptr<thin> ptr;
dev_t get_dev_t() const;
typedef boost::optional<block_address> maybe_address;
maybe_address lookup(block_address thin_block);
void insert(block_address thin_block, block_address data_block);
void remove(block_address thin_block);
void set_snapshot_time(uint32_t time);
persistent_data::block_address get_mapped_blocks() const;
void set_mapped_blocks(persistent_data::block_address count);
private:
dev_t dev_;
metadata::ptr metadata_;
};
thin::ptr open(dev_t);
private: private:
friend class thin; friend class thin;
bool device_exists(dev_t dev) const; bool device_exists(thin_dev_t dev) const;
uint32_t time_; block_address superblock_;
persistent_data::transaction_manager<BLOCK_SIZE>::ptr tm_; typedef persistent_data::transaction_manager<MD_BLOCK_SIZE>::ptr tm_ptr;
typedef persistent_data::btree<1, detail_traits, BLOCK_SIZE> detail_tree; typedef persistent_data::btree<1, device_details_traits, MD_BLOCK_SIZE> detail_tree;
typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> dev_tree; typedef persistent_data::btree<1, mtree_traits<MD_BLOCK_SIZE>, MD_BLOCK_SIZE> dev_tree;
typedef persistent_data::btree<2, uint64_traits, BLOCK_SIZE> mapping_tree; typedef persistent_data::btree<2, block_traits, MD_BLOCK_SIZE> mapping_tree;
typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> single_mapping_tree; typedef persistent_data::btree<1, block_traits, MD_BLOCK_SIZE> single_mapping_tree;
tm_ptr tm_;
space_map::ptr metadata_sm_;
space_map::ptr data_sm_;
detail_tree details_; detail_tree details_;
dev_tree mappings_top_level_; dev_tree mappings_top_level_;
mapping_tree mappings_; mapping_tree mappings_;
superblock sb_;
}; };
}; };

View File

@ -0,0 +1,95 @@
#include "metadata_disk_structures.h"
#include <string.h>
using namespace thin_provisioning;
//----------------------------------------------------------------
void
device_details_traits::unpack(device_details_disk const &disk, device_details &value)
{
value.dev_size_ = to_cpu<uint64_t>(disk.dev_size_);
value.mapped_blocks_ = to_cpu<uint64_t>(disk.mapped_blocks_);
value.transaction_id_ = to_cpu<uint64_t>(disk.transaction_id_);
value.creation_time_ = to_cpu<uint32_t>(disk.creation_time_);
value.snapshotted_time_ = to_cpu<uint32_t>(disk.snapshotted_time_);
}
void
device_details_traits::pack(device_details const &value, device_details_disk &disk)
{
disk.dev_size_ = to_disk<__le64>(value.dev_size_);
disk.mapped_blocks_ = to_disk<__le64>(value.mapped_blocks_);
disk.transaction_id_ = to_disk<__le64>(value.transaction_id_);
disk.creation_time_ = to_disk<__le32>(value.creation_time_);
disk.snapshotted_time_ = to_disk<__le32>(value.snapshotted_time_);
}
void
superblock_traits::unpack(superblock_disk const &disk, superblock &value)
{
value.csum_ = to_cpu<uint32_t>(disk.csum_);
value.flags_ = to_cpu<uint32_t>(disk.csum_);
value.blocknr_ = to_cpu<uint64_t>(disk.blocknr_);
::memcpy(value.uuid_, disk.uuid_, sizeof(value.uuid_));
value.magic_ = to_cpu<uint64_t>(disk.magic_);
value.version_ = to_cpu<uint32_t>(disk.version_);
value.time_ = to_cpu<uint32_t>(disk.time_);
value.trans_id_ = to_cpu<uint64_t>(disk.trans_id_);
value.held_root_ = to_cpu<uint64_t>(disk.held_root_);
::memcpy(value.data_space_map_root_,
disk.data_space_map_root_,
sizeof(value.data_space_map_root_));
::memcpy(value.metadata_space_map_root_,
disk.metadata_space_map_root_,
sizeof(value.metadata_space_map_root_));
value.data_mapping_root_ = to_cpu<uint64_t>(disk.data_mapping_root_);
value.device_details_root_ = to_cpu<uint64_t>(disk.device_details_root_);
value.data_block_size_ = to_cpu<uint32_t>(disk.data_block_size_);
value.metadata_block_size_ = to_cpu<uint32_t>(disk.metadata_block_size_);
value.metadata_nr_blocks_ = to_cpu<uint64_t>(disk.metadata_nr_blocks_);
value.compat_flags_ = to_cpu<uint32_t>(disk.compat_flags_);
value.incompat_flags_ = to_cpu<uint32_t>(disk.incompat_flags_);
}
void
superblock_traits::pack(superblock const &value, superblock_disk &disk)
{
disk.csum_ = to_disk<__le32>(value.csum_);
disk.flags_ = to_disk<__le32>(value.csum_);
disk.blocknr_ = to_disk<__le64>(value.blocknr_);
::memcpy(disk.uuid_, value.uuid_, sizeof(disk.uuid_));
disk.magic_ = to_disk<__le64>(value.magic_);
disk.version_ = to_disk<__le32>(value.version_);
disk.time_ = to_disk<__le32>(value.time_);
disk.trans_id_ = to_disk<__le64>(value.trans_id_);
disk.held_root_ = to_disk<__le64>(value.held_root_);
::memcpy(disk.data_space_map_root_,
value.data_space_map_root_,
sizeof(disk.data_space_map_root_));
::memcpy(disk.metadata_space_map_root_,
value.metadata_space_map_root_,
sizeof(disk.metadata_space_map_root_));
disk.data_mapping_root_ = to_disk<__le64>(value.data_mapping_root_);
disk.device_details_root_ = to_disk<__le64>(value.device_details_root_);
disk.data_block_size_ = to_disk<__le32>(value.data_block_size_);
disk.metadata_block_size_ = to_disk<__le32>(value.metadata_block_size_);
disk.metadata_nr_blocks_ = to_disk<__le64>(value.metadata_nr_blocks_);
disk.compat_flags_ = to_disk<__le32>(value.compat_flags_);
disk.incompat_flags_ = to_disk<__le32>(value.incompat_flags_);
}
//----------------------------------------------------------------

117
metadata_disk_structures.h Normal file
View File

@ -0,0 +1,117 @@
#ifndef METADATA_DISK_STRUCTURES_H
#define METADATA_DISK_STRUCTURES_H
#include "endian.h"
#include "btree.h"
//----------------------------------------------------------------
namespace thin_provisioning {
using namespace base; // FIXME: don't use namespaces in headers.
struct device_details_disk {
__le64 dev_size_;
__le64 mapped_blocks_;
__le64 transaction_id_; /* when created */
__le32 creation_time_;
__le32 snapshotted_time_;
} __attribute__ ((packed));
struct device_details {
uint64_t dev_size_;
uint64_t mapped_blocks_;
uint64_t transaction_id_; /* when created */
uint32_t creation_time_;
uint32_t snapshotted_time_;
};
struct device_details_traits {
typedef device_details_disk disk_type;
typedef device_details value_type;
typedef persistent_data::NoOpRefCounter<device_details> ref_counter;
static void unpack(device_details_disk const &disk, device_details &value);
static void pack(device_details const &value, device_details_disk &disk);
};
unsigned const SPACE_MAP_ROOT_SIZE = 128;
typedef unsigned char __u8;
struct superblock_disk {
__le32 csum_;
__le32 flags_;
__le64 blocknr_;
__u8 uuid_[16];
__le64 magic_;
__le32 version_;
__le32 time_;
__le64 trans_id_;
/* root for userspace's transaction (for migration and friends) */
__le64 held_root_;
__u8 data_space_map_root_[SPACE_MAP_ROOT_SIZE];
__u8 metadata_space_map_root_[SPACE_MAP_ROOT_SIZE];
/* 2 level btree mapping (dev_id, (dev block, time)) -> data block */
__le64 data_mapping_root_;
/* device detail root mapping dev_id -> device_details */
__le64 device_details_root_;
__le32 data_block_size_; /* in 512-byte sectors */
__le32 metadata_block_size_; /* in 512-byte sectors */
__le64 metadata_nr_blocks_;
__le32 compat_flags_;
__le32 incompat_flags_;
} __attribute__ ((packed));
struct superblock {
uint32_t csum_;
uint32_t flags_;
uint64_t blocknr_;
unsigned char uuid_[16];
uint64_t magic_;
uint32_t version_;
uint32_t time_;
uint64_t trans_id_;
/* root for userspace's transaction (for migration and friends) */
uint64_t held_root_;
unsigned char data_space_map_root_[SPACE_MAP_ROOT_SIZE];
unsigned char metadata_space_map_root_[SPACE_MAP_ROOT_SIZE];
/* 2 level btree mapping (dev_id, (dev block, time)) -> data block */
uint64_t data_mapping_root_;
/* device detail root mapping dev_id -> device_details */
uint64_t device_details_root_;
uint32_t data_block_size_; /* in 512-byte sectors */
uint32_t metadata_block_size_; /* in 512-byte sectors */
uint64_t metadata_nr_blocks_;
uint32_t compat_flags_;
uint32_t incompat_flags_;
};
struct superblock_traits {
typedef superblock_disk disk_type;
typedef superblock value_type;
typedef NoOpRefCounter<superblock> ref_counter;
static void unpack(superblock_disk const &disk, superblock &value);
static void pack(superblock const &value, superblock_disk &disk);
};
}
//----------------------------------------------------------------
#endif

41
metadata_t.cc Normal file
View File

@ -0,0 +1,41 @@
#include "metadata.h"
#include "core_map.h"
#define BOOST_TEST_MODULE MetadataTests
#include <boost/test/included/unit_test.hpp>
using namespace std;
using namespace boost;
using namespace persistent_data;
using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
block_address const NR_BLOCKS = 1024;
block_address const SUPERBLOCK = 0;
transaction_manager<4096>::ptr
create_tm() {
block_manager<4096>::ptr bm(new block_manager<4096>("./test.data", NR_BLOCKS));
space_map::ptr sm(new core_map(NR_BLOCKS));
transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm));
return tm;
}
metadata::ptr
create_metadata() {
auto tm = create_tm();
return metadata::ptr(
new metadata(tm, 0, 128, 1024000, true));
}
}
//----------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_metadata_object)
{
auto m = create_metadata();
}
//----------------------------------------------------------------

View File

@ -29,8 +29,10 @@ namespace persistent_data {
virtual bool count_possibly_greater_than_one(block_address b) const = 0; virtual bool count_possibly_greater_than_one(block_address b) const = 0;
}; };
class persistent_space_map { class persistent_space_map : public space_map {
public: public:
typedef boost::shared_ptr<persistent_space_map> ptr;
virtual size_t root_size() = 0; virtual size_t root_size() = 0;
virtual void copy_root(void *dest, size_t len) = 0; virtual void copy_root(void *dest, size_t len) = 0;
}; };

7
space_map_disk.cc Normal file
View File

@ -0,0 +1,7 @@
#include "space_map_disk.h"
//----------------------------------------------------------------
//----------------------------------------------------------------

338
space_map_disk.h Normal file
View File

@ -0,0 +1,338 @@
#ifndef SPACE_MAP_DISK_H
#define SPACE_MAP_DISK_H
#include "space_map.h"
#include "transaction_manager.h"
#include "endian.h"
#include "space_map_disk_structures.h"
#include "math.h"
//----------------------------------------------------------------
namespace persistent_data {
namespace sm_disk_detail {
using namespace base;
using namespace persistent_data;
template <uint32_t BlockSize>
class bitmap {
public:
bitmap(typename transaction_manager<BlockSize>::ptr tm,
index_entry const &ie)
: tm_(tm),
ie_(ie) {
}
ref_t lookup(unsigned b) const {
auto rr = tm_->read_lock(ie_.blocknr_);
void const *bits = bitmap_data(rr);
ref_t b1 = test_bit_le(bits, b * 2);
ref_t b2 = test_bit_le(bits, b * 2 + 1);
ref_t result = b2 ? 1 : 0;
result |= b1 ? 0b10 : 0;
return result;
}
void insert(unsigned b, ref_t n) {
auto wr = tm_->shadow(ie_.blocknr_).first;
void *bits = bitmap_data(wr);
bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1);
if (n == 1 || n == 3)
set_bit_le(bits, b * 2 + 1);
else
clear_bit_le(bits, b * 2 + 1);
if (n == 2 || n == 3)
set_bit_le(bits, b * 2);
else
clear_bit_le(bits, b * 2);
ie_.blocknr_ = wr.get_location();
if (was_free && n > 0) {
ie_.nr_free_--;
if (b == ie_.none_free_before_)
ie_.none_free_before_++;
}
if (!was_free && n == 0) {
ie_.nr_free_++;
if (b < ie_.none_free_before_)
ie_.none_free_before_ = b;
}
}
unsigned find_free(unsigned end) {
for (unsigned i = ie_.none_free_before_; i < end; i++) {
if (lookup(i) == 0) {
insert(i, 1);
return i;
}
}
throw std::runtime_error("no free entry in bitmap");
}
index_entry const &get_ie() const {
return ie_;
}
private:
void *bitmap_data(typename transaction_manager<BlockSize>::write_ref &wr) {
bitmap_header *h = reinterpret_cast<bitmap_header *>(&wr.data()[0]);
return h + 1;
}
void const *bitmap_data(typename transaction_manager<BlockSize>::read_ref &rr) const {
bitmap_header const *h = reinterpret_cast<bitmap_header const *>(&rr.data()[0]);
return h + 1;
}
typename transaction_manager<BlockSize>::ptr tm_;
index_entry ie_;
};
struct ref_count_traits {
typedef __le32 disk_type;
typedef uint32_t value_type;
typedef NoOpRefCounter<uint32_t> ref_counter;
static void unpack(disk_type const &d, value_type &v) {
v = to_cpu<value_type>(d);
}
static void pack(value_type const &v, disk_type &d) {
d = to_disk<disk_type>(v);
}
};
template <uint32_t BlockSize>
class sm_disk : public persistent_space_map {
public:
typedef boost::shared_ptr<sm_disk<BlockSize> > ptr;
sm_disk(typename transaction_manager<BlockSize>::ptr tm,
block_address nr_blocks)
: tm_(tm),
entries_per_block_((BlockSize - sizeof(bitmap_header)) * 4),
nr_blocks_(0),
nr_allocated_(0),
bitmaps_(tm_, typename sm_disk_detail::index_entry_traits::ref_counter()),
ref_counts_(tm_, ref_count_traits::ref_counter()) {
extend(nr_blocks);
}
sm_disk(typename transaction_manager<BlockSize>::ptr tm,
sm_root const &root)
: tm_(tm),
nr_blocks_(root.nr_blocks_),
nr_allocated_(root.nr_allocated_),
bitmaps_(tm_, root.bitmap_root_, typename sm_disk::index_entry_traits::ref_counter()),
ref_counts_(tm_, root.ref_count_root_, typename ref_count_traits::ref_counter()) {
}
block_address get_nr_blocks() const {
return nr_blocks_;
}
block_address get_nr_free() const {
return nr_blocks_ - nr_allocated_;
}
ref_t get_count(block_address b) const {
auto count = lookup_bitmap(b);
if (count == 3)
return lookup_ref_count(b);
return count;
}
void set_count(block_address b, ref_t c) {
ref_t old = get_count(b);
if (c == old)
return;
if (c > 2) {
if (old < 3)
insert_bitmap(b, 3);
insert_ref_count(b, c);
} else {
if (old > 2)
remove_ref_count(b);
insert_bitmap(b, c);
}
if (old == 0)
nr_allocated_++;
else if (c == 0)
nr_allocated_--;
}
void commit() {
}
void inc(block_address b) {
// FIXME: 2 get_counts
ref_t old = get_count(b);
set_count(b, old + 1);
}
void dec(block_address b) {
ref_t old = get_count(b);
set_count(b, old - 1);
}
block_address new_block() {
// silly to always start searching from the
// beginning.
block_address nr_indexes = div_up<block_address>(nr_blocks_, entries_per_block_);
for (block_address index = 0; index < nr_indexes; index++) {
uint64_t key[1] = {index};
auto mie = bitmaps_.lookup(key);
if (!mie)
throw runtime_error("bitmap entry missing from btree");
bitmap<BlockSize> bm(tm_, *mie);
block_address b = bm.find_free((index == nr_indexes - 1) ?
nr_blocks_ % entries_per_block_ : entries_per_block_);
bitmaps_.insert(key, bm.get_ie());
nr_allocated_++;
b = (index * entries_per_block_) + b;
assert(get_count(b) == 1);
return b;
}
throw runtime_error("out of space");
}
bool count_possibly_greater_than_one(block_address b) const {
return get_count(b) > 1;
}
size_t root_size() {
return sizeof(sm_root_disk);
}
void copy_root(void *dest, size_t len) {
sm_root_disk d;
sm_root v;
if (len < sizeof(d))
throw runtime_error("root too small");
v.nr_blocks_ = nr_blocks_;
v.nr_allocated_ = nr_allocated_;
v.bitmap_root_ = bitmaps_.get_root();
v.ref_count_root_ = ref_counts_.get_root();
sm_root_traits::pack(v, d);
::memcpy(dest, &d, sizeof(d));
}
private:
void extend(block_address extra_blocks) {
block_address nr_blocks = nr_blocks_ + extra_blocks;
block_address bitmap_count = div_up<block_address>(nr_blocks, entries_per_block_);
block_address old_bitmap_count = div_up<block_address>(nr_blocks_, entries_per_block_);
for (block_address i = old_bitmap_count; i < bitmap_count; i++) {
auto wr = tm_->new_block();
struct index_entry ie;
ie.blocknr_ = wr.get_location();
ie.nr_free_ = i == (bitmap_count - 1) ?
(nr_blocks % entries_per_block_) : entries_per_block_;
ie.none_free_before_ = 0;
uint64_t key[1] = {i};
bitmaps_.insert(key, ie);
}
nr_blocks_ = nr_blocks;
}
ref_t lookup_bitmap(block_address b) const {
uint64_t key[1] = {b / entries_per_block_};
auto mindex = bitmaps_.lookup(key);
if (!mindex)
throw runtime_error("Couldn't lookup bitmap");
bitmap<BlockSize> bm(tm_, *mindex);
return bm.lookup(b % entries_per_block_);
}
void insert_bitmap(block_address b, unsigned n) {
if (n > 3)
throw runtime_error("bitmap can only hold 2 bit values");
uint64_t key[1] = {b / entries_per_block_};
auto mindex = bitmaps_.lookup(key);
if (!mindex)
throw runtime_error("Couldn't lookup bitmap");
bitmap<BlockSize> bm(tm_, *mindex);
bm.insert(b % entries_per_block_, n);
bitmaps_.insert(key, bm.get_ie());
}
ref_t lookup_ref_count(block_address b) const {
uint64_t key[1] = {b};
auto mvalue = ref_counts_.lookup(key);
if (!mvalue)
throw runtime_error("ref count not in tree");
return *mvalue;
}
void insert_ref_count(block_address b, ref_t count) {
uint64_t key[1] = {b};
ref_counts_.insert(key, count);
}
void remove_ref_count(block_address b) {
uint64_t key[1] = {b};
ref_counts_.remove(key);
}
typename transaction_manager<BlockSize>::ptr tm_;
uint32_t entries_per_block_;
block_address nr_blocks_;
block_address nr_allocated_;
btree<1, index_entry_traits, BlockSize> bitmaps_;
btree<1, ref_count_traits, BlockSize> ref_counts_;
};
}
template <uint32_t MetadataBlockSize>
persistent_space_map::ptr
create_disk_sm(typename transaction_manager<MetadataBlockSize>::ptr tm,
block_address nr_blocks)
{
using namespace sm_disk_detail;
return typename persistent_space_map::ptr(
new sm_disk<MetadataBlockSize>(tm, nr_blocks));
}
template <uint32_t MetadataBlockSize>
persistent_space_map::ptr
open_disk_sm(typename transaction_manager<MetadataBlockSize>::ptr tm,
void *root)
{
using namespace sm_disk_detail;
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
return typename persistent_space_map::ptr(
new sm_disk<MetadataBlockSize>(tm, v));
}
}
//----------------------------------------------------------------
#endif

View File

@ -0,0 +1,98 @@
#ifndef SPACE_MAP_DISK_STRUCTURES_H
#define SPACE_MAP_DISK_STRUCTURES_H
#include "endian.h"
#include "btree.h"
//----------------------------------------------------------------
namespace persistent_data {
using namespace base;
namespace sm_disk_detail {
struct index_entry_disk {
__le64 blocknr_;
__le32 nr_free_;
__le32 none_free_before_;
} __attribute__ ((packed));
struct index_entry {
uint64_t blocknr_;
uint32_t nr_free_;
uint32_t none_free_before_;
};
struct index_entry_traits {
typedef index_entry_disk disk_type;
typedef index_entry value_type;
typedef NoOpRefCounter<index_entry> ref_counter;
static void unpack(disk_type const &disk, value_type &value) {
value.blocknr_ = to_cpu<uint64_t>(disk.blocknr_);
value.nr_free_ = to_cpu<uint32_t>(disk.nr_free_);
value.none_free_before_ = to_cpu<uint32_t>(disk.none_free_before_);
}
static void pack(value_type const &value, disk_type &disk) {
disk.blocknr_ = to_disk<__le64>(value.blocknr_);
disk.nr_free_ = to_disk<__le32>(value.nr_free_);
disk.none_free_before_ = to_disk<__le32>(value.none_free_before_);
}
};
unsigned const MAX_METADATA_BITMAPS = 255;
unsigned const ENTRIES_PER_BYTE = 4;
struct metadata_index {
__le32 csum_;
__le32 padding_;
__le64 blocknr_;
struct index_entry index[MAX_METADATA_BITMAPS];
} __attribute__ ((packed));
struct sm_root_disk {
__le64 nr_blocks_;
__le64 nr_allocated_;
__le64 bitmap_root_;
__le64 ref_count_root_;
} __attribute__ ((packed));
struct sm_root {
uint64_t nr_blocks_;
uint64_t nr_allocated_;
uint64_t bitmap_root_;
uint64_t ref_count_root_;
};
struct sm_root_traits {
typedef sm_root_disk disk_type;
typedef sm_root value_type;
typedef NoOpRefCounter<sm_root> ref_counter;
static void unpack(disk_type const &disk, value_type &value) {
value.nr_blocks_ = to_cpu<uint64_t>(disk.nr_blocks_);
value.nr_allocated_ = to_cpu<uint64_t>(disk.nr_allocated_);
value.bitmap_root_ = to_cpu<uint64_t>(disk.bitmap_root_);
value.ref_count_root_ = to_cpu<uint64_t>(disk.ref_count_root_);
}
static void pack(value_type const &value, disk_type &disk) {
disk.nr_blocks_ = to_disk<__le64>(value.nr_blocks_);
disk.nr_allocated_ = to_disk<__le64>(value.nr_allocated_);
disk.bitmap_root_ = to_disk<__le64>(value.bitmap_root_);
disk.ref_count_root_ = to_disk<__le64>(value.ref_count_root_);
}
};
struct bitmap_header {
__le32 csum;
__le32 not_used;
__le64 blocknr;
} __attribute__ ((packed));
}
}
//----------------------------------------------------------------
#endif

108
space_map_disk_t.cc Normal file
View File

@ -0,0 +1,108 @@
#include "space_map_disk.h"
#include "core_map.h"
#define BOOST_TEST_MODULE SpaceMapDiskTests
#include <boost/test/included/unit_test.hpp>
using namespace std;
using namespace boost;
using namespace persistent_data;
//----------------------------------------------------------------
namespace {
block_address const NR_BLOCKS = 1023;
block_address const SUPERBLOCK = 0;
unsigned const BLOCK_SIZE = 4096;
transaction_manager<BLOCK_SIZE>::ptr
create_tm() {
block_manager<BLOCK_SIZE>::ptr bm(
new block_manager<BLOCK_SIZE>("./test.data", NR_BLOCKS));
space_map::ptr sm(new core_map(1024));
transaction_manager<BLOCK_SIZE>::ptr tm(
new transaction_manager<BLOCK_SIZE>(bm, sm));
return tm;
}
persistent_space_map::ptr
create_sm_disk() {
auto tm = create_tm();
return persistent_data::create_disk_sm<BLOCK_SIZE>(tm, NR_BLOCKS);
}
}
//----------------------------------------------------------------
BOOST_AUTO_TEST_CASE(reopen_an_sm)
{
auto sm = create_sm_disk();
}
BOOST_AUTO_TEST_CASE(test_get_nr_blocks)
{
auto sm = create_sm_disk();
BOOST_CHECK_EQUAL(sm->get_nr_blocks(), NR_BLOCKS);
}
BOOST_AUTO_TEST_CASE(test_get_nr_free)
{
auto sm = create_sm_disk();
BOOST_CHECK_EQUAL(sm->get_nr_free(), NR_BLOCKS);
for (unsigned i = 0; i < NR_BLOCKS; i++) {
sm->new_block();
BOOST_CHECK_EQUAL(sm->get_nr_free(), NR_BLOCKS - i - 1);
}
for (unsigned i = 0; i < NR_BLOCKS; i++) {
sm->dec(i);
BOOST_CHECK_EQUAL(sm->get_nr_free(), i + 1);
}
}
BOOST_AUTO_TEST_CASE(test_throws_no_space)
{
auto sm = create_sm_disk();
for (unsigned i = 0; i < NR_BLOCKS; i++)
sm->new_block();
BOOST_CHECK_THROW(sm->new_block(), std::runtime_error);
}
BOOST_AUTO_TEST_CASE(test_inc_and_dec)
{
auto sm = create_sm_disk();
block_address b = 63;
for (unsigned i = 0; i < 50; i++) {
BOOST_CHECK_EQUAL(sm->get_count(b), i);
sm->inc(b);
}
for (unsigned i = 50; i > 0; i--) {
BOOST_CHECK_EQUAL(sm->get_count(b), i);
sm->dec(b);
}
}
BOOST_AUTO_TEST_CASE(test_not_allocated_twice)
{
auto sm = create_sm_disk();
block_address b = sm->new_block();
try {
for (;;)
BOOST_CHECK(sm->new_block() != b);
} catch (...) {
}
}
BOOST_AUTO_TEST_CASE(test_set_count)
{
auto sm = create_sm_disk();
sm->set_count(43, 5);
BOOST_CHECK_EQUAL(sm->get_count(43), 5);
}
//----------------------------------------------------------------

View File

@ -11,11 +11,6 @@ unsigned const NR_BLOCKS = 1024;
//---------------------------------------------------------------- //----------------------------------------------------------------
namespace {
}
//----------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_get_nr_blocks) BOOST_AUTO_TEST_CASE(test_get_nr_blocks)
{ {
core_map sm(NR_BLOCKS); core_map sm(NR_BLOCKS);

View File

@ -11,7 +11,7 @@
namespace persistent_data { namespace persistent_data {
template <uint32_t MetadataBlockSize> template <uint32_t MetadataBlockSize>
class transaction_manager : public boost::noncopyable { class transaction_manager : boost::noncopyable {
public: public:
typedef boost::shared_ptr<transaction_manager<MetadataBlockSize> > ptr; typedef boost::shared_ptr<transaction_manager<MetadataBlockSize> > ptr;
typedef typename block_manager<MetadataBlockSize>::read_ref read_ref; typedef typename block_manager<MetadataBlockSize>::read_ref read_ref;
@ -45,6 +45,10 @@ namespace persistent_data {
return sm_; return sm_;
} }
typename block_manager<MetadataBlockSize>::ptr get_bm() {
return bm_;
}
private: private:
void add_shadow(block_address b); void add_shadow(block_address b);
void remove_shadow(block_address b); void remove_shadow(block_address b);