diff --git a/Makefile b/Makefile index c2677cd..71fb92e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ SOURCE=\ endian_utils.cc \ error_set.cc \ metadata.cc \ - metadata_disk_structures.cc + metadata_disk_structures.cc \ + space_map_disk.cc \ + transaction_manager.cc TEST_SOURCE=\ unit-tests/block_t.cc \ @@ -48,16 +50,16 @@ thin_repair: $(OBJECTS) thin_repair.o unit-tests/block_t: unit-tests/block_t.o g++ $(CPPFLAGS) -o $@ $+ $(LIBS) -unit-tests/btree_t: unit-tests/btree_t.o +unit-tests/btree_t: unit-tests/btree_t.o $(OBJECTS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS) -unit-tests/space_map_t: unit-tests/space_map_t.o +unit-tests/space_map_t: unit-tests/space_map_t.o $(OBJECTS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS) unit-tests/space_map_disk_t: unit-tests/space_map_disk_t.o $(OBJECTS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS) -unit-tests/transaction_manager_t: unit-tests/transaction_manager_t.o +unit-tests/transaction_manager_t: unit-tests/transaction_manager_t.o $(OBJECTS) g++ $(CPPFLAGS) -o $@ $+ $(LIBS) unit-tests/metadata_t: unit-tests/metadata_t.o $(OBJECTS) diff --git a/block.h b/block.h index bbebec5..55ad343 100644 --- a/block.h +++ b/block.h @@ -15,6 +15,8 @@ namespace persistent_data { + uint32_t const MD_BLOCK_SIZE = 4096; + class count_adjuster { public: count_adjuster(unsigned &c) @@ -32,7 +34,7 @@ namespace persistent_data { typedef uint64_t block_address; - template + template class block_manager : private boost::noncopyable { public: typedef boost::shared_ptr ptr; diff --git a/btree.h b/btree.h index 13dbc7f..94b1bac 100644 --- a/btree.h +++ b/btree.h @@ -69,7 +69,7 @@ namespace persistent_data { //------------------------------------------------ // Class that acts as an interface over the raw little endian btree // node data. - template + template class node_ref { public: explicit node_ref(block_address b, disk_node *raw); @@ -142,31 +142,30 @@ namespace persistent_data { //------------------------------------------------ // - template - node_ref - to_node(typename block_manager::read_ref &b) + template + node_ref + to_node(typename block_manager<>::read_ref &b) { // FIXME: this should return a const read_ref somehow. - return node_ref( + return node_ref( b.get_location(), reinterpret_cast( const_cast(b.data()))); } - template - node_ref - to_node(typename block_manager::write_ref &b) + template + node_ref + to_node(typename block_manager<>::write_ref &b) { - return node_ref( + return node_ref( b.get_location(), reinterpret_cast( const_cast(b.data()))); } - template class ro_spine : private noncopyable { public: - ro_spine(typename transaction_manager::ptr tm) + ro_spine(typename transaction_manager::ptr tm) : tm_(tm) { } @@ -177,22 +176,21 @@ namespace persistent_data { } template - node_ref get_node() { - return to_node(spine_.back()); + node_ref get_node() { + return to_node(spine_.back()); } private: - typename transaction_manager::ptr tm_; - std::list::read_ref> spine_; + typename transaction_manager::ptr tm_; + std::list::read_ref> spine_; }; - template class shadow_spine : private noncopyable { public: - typedef typename transaction_manager::read_ref read_ref; - typedef typename transaction_manager::write_ref write_ref; + typedef typename transaction_manager::read_ref read_ref; + typedef typename transaction_manager::write_ref write_ref; - shadow_spine(typename transaction_manager::ptr tm) + shadow_spine(typename transaction_manager::ptr tm) : tm_(tm) { } @@ -208,7 +206,7 @@ namespace persistent_data { return p.second; } - void step(typename transaction_manager::write_ref b) { + void step(typename transaction_manager::write_ref b) { spine_.push_back(b); if (spine_.size() == 1) root_ = spine_.front().get_location(); @@ -221,8 +219,8 @@ namespace persistent_data { } template - node_ref get_node() { - return to_node(spine_.back()); + node_ref get_node() { + return to_node(spine_.back()); } block_address get_block() const { @@ -233,11 +231,11 @@ namespace persistent_data { return spine_.size() > 1; } - node_ref get_parent() { + node_ref get_parent() { if (spine_.size() < 2) throw std::runtime_error("no parent"); - return to_node(spine_.front()); + return to_node(spine_.front()); } block_address get_parent_location() const { @@ -249,22 +247,22 @@ namespace persistent_data { } private: - typename transaction_manager::ptr tm_; - std::list::write_ref> spine_; + typename transaction_manager::ptr tm_; + std::list::write_ref> spine_; block_address root_; }; // FIXME: make a member of btree - template + template optional - lookup_raw(ro_spine &spine, block_address block, uint64_t key) { + lookup_raw(ro_spine &spine, block_address block, uint64_t key) { using namespace boost; typedef typename ValueTraits::value_type leaf_type; for (;;) { spine.step(block); - node_ref leaf = spine.template get_node(); + node_ref leaf = spine.template get_node(); optional mi = leaf.exact_search(key); if (!mi) @@ -273,30 +271,30 @@ namespace persistent_data { if (leaf.get_type() == btree_detail::LEAF) return optional(leaf.value_at(*mi)); - node_ref internal = spine.template get_node(); + node_ref internal = spine.template get_node(); block = internal.value_at(*mi); } } } - template + template class btree { public: - typedef boost::shared_ptr > ptr; + typedef boost::shared_ptr > ptr; typedef uint64_t key[Levels]; typedef typename ValueTraits::value_type value_type; typedef boost::optional maybe_value; typedef boost::optional > maybe_pair; - typedef typename block_manager::read_ref read_ref; - typedef typename block_manager::write_ref write_ref; - typedef typename btree_detail::node_ref leaf_node; - typedef typename btree_detail::node_ref internal_node; + typedef typename block_manager<>::read_ref read_ref; + typedef typename block_manager<>::write_ref write_ref; + typedef typename btree_detail::node_ref leaf_node; + typedef typename btree_detail::node_ref internal_node; - btree(typename persistent_data::transaction_manager::ptr tm, + btree(typename persistent_data::transaction_manager::ptr tm, typename ValueTraits::ref_counter rc); - btree(typename transaction_manager::ptr tm, + btree(typename transaction_manager::ptr tm, block_address root, typename ValueTraits::ref_counter rc); @@ -337,22 +335,22 @@ namespace persistent_data { private: template - void split_node(btree_detail::shadow_spine &spine, + void split_node(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key, bool top); template - void split_beneath(btree_detail::shadow_spine &spine, uint64_t key); + void split_beneath(btree_detail::shadow_spine &spine, uint64_t key); template - void split_sibling(btree_detail::shadow_spine &spine, + void split_sibling(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key); template bool - insert_location(btree_detail::shadow_spine &spine, + insert_location(btree_detail::shadow_spine &spine, block_address block, uint64_t key, int *index); @@ -361,7 +359,7 @@ namespace persistent_data { unsigned level, bool is_root, block_address b) const; - typename persistent_data::transaction_manager::ptr tm_; + typename persistent_data::transaction_manager::ptr tm_; bool destroy_; block_address root_; NoOpRefCounter internal_rc_; diff --git a/btree.tcc b/btree.tcc index 5d9dca3..3ff83b9 100644 --- a/btree.tcc +++ b/btree.tcc @@ -9,23 +9,23 @@ using namespace std; //---------------------------------------------------------------- -template -node_ref::node_ref(block_address location, disk_node *raw) +template +node_ref::node_ref(block_address location, disk_node *raw) : location_(location), raw_(raw) { } -template +template block_address -node_ref::get_block_nr() const +node_ref::get_block_nr() const { return to_cpu(raw_->header.blocknr); } -template +template btree_detail::node_type -node_ref::get_type() const +node_ref::get_type() const { uint32_t flags = to_cpu(raw_->header.flags); if (flags & INTERNAL_NODE) @@ -36,9 +36,9 @@ node_ref::get_type() const throw runtime_error("unknow node type"); } -template +template void -node_ref::set_type(node_type t) +node_ref::set_type(node_type t) { uint32_t flags = to_cpu(raw_->header.flags); switch (t) { @@ -53,67 +53,67 @@ node_ref::set_type(node_type t) raw_->header.flags = to_disk<__le32>(flags); } -template +template unsigned -node_ref::get_nr_entries() const +node_ref::get_nr_entries() const { return to_cpu(raw_->header.nr_entries); } -template +template void -node_ref::set_nr_entries(unsigned n) +node_ref::set_nr_entries(unsigned n) { raw_->header.nr_entries = to_disk<__le32>(n); } -template +template unsigned -node_ref::get_max_entries() const +node_ref::get_max_entries() const { return to_cpu(raw_->header.max_entries); } -template +template void -node_ref::set_max_entries(unsigned n) +node_ref::set_max_entries(unsigned n) { raw_->header.max_entries = to_disk<__le32>(n); } -template +template void -node_ref::set_max_entries() +node_ref::set_max_entries() { set_max_entries(calc_max_entries()); } -template +template size_t -node_ref::get_value_size() const +node_ref::get_value_size() const { return to_cpu(raw_->header.value_size); } -template +template uint64_t -node_ref::key_at(unsigned i) const +node_ref::key_at(unsigned i) const { if (i >= get_nr_entries()) throw runtime_error("key index out of bounds"); return to_cpu(raw_->keys[i]); } -template +template void -node_ref::set_key(unsigned i, uint64_t k) +node_ref::set_key(unsigned i, uint64_t k) { raw_->keys[i] = to_disk<__le64>(k); } -template +template typename ValueTraits::value_type -node_ref::value_at(unsigned i) const +node_ref::value_at(unsigned i) const { if (i >= get_nr_entries()) throw runtime_error("value index out of bounds"); @@ -127,21 +127,21 @@ node_ref::value_at(unsigned i) const return v; } -template +template void -node_ref::set_value(unsigned i, - typename ValueTraits::value_type const &v) +node_ref::set_value(unsigned i, + typename ValueTraits::value_type const &v) { typename ValueTraits::disk_type d; ValueTraits::pack(v, d); ::memcpy(value_ptr(i), &d, sizeof(d)); } -template +template void -node_ref::insert_at(unsigned i, - uint64_t key, - typename ValueTraits::value_type const &v) +node_ref::insert_at(unsigned i, + uint64_t key, + typename ValueTraits::value_type const &v) { unsigned n = get_nr_entries(); if ((n + 1) > get_max_entries()) @@ -153,21 +153,21 @@ node_ref::insert_at(unsigned i, overwrite_at(i, key, v); } -template +template void -node_ref::overwrite_at(unsigned i, - uint64_t key, - typename ValueTraits::value_type const &v) +node_ref::overwrite_at(unsigned i, + uint64_t key, + typename ValueTraits::value_type const &v) { set_key(i, key); set_value(i, v); } -template +template void -node_ref::copy_entries(node_ref const &rhs, - unsigned begin, - unsigned end) +node_ref::copy_entries(node_ref const &rhs, + unsigned begin, + unsigned end) { unsigned count = end - begin; unsigned n = get_nr_entries(); @@ -179,10 +179,9 @@ node_ref::copy_entries(node_ref const &rhs, ::memcpy(value_ptr(n), rhs.value_ptr(begin), sizeof(typename ValueTraits::disk_type) * count); } -template +template int -node_ref::bsearch(uint64_t key, - int want_hi) const +node_ref::bsearch(uint64_t key, int want_hi) const { int lo = -1, hi = get_nr_entries(); @@ -202,9 +201,9 @@ node_ref::bsearch(uint64_t key, return want_hi ? hi : lo; } -template +template optional -node_ref::exact_search(uint64_t key) const +node_ref::exact_search(uint64_t key) const { int i = bsearch(key, 0); if (i < 0 || static_cast(i) >= get_nr_entries()) @@ -213,45 +212,45 @@ node_ref::exact_search(uint64_t key) const return optional(i); } -template +template int -node_ref::lower_bound(uint64_t key) const +node_ref::lower_bound(uint64_t key) const { return bsearch(key, 0); } -template +template unsigned -node_ref::calc_max_entries(void) +node_ref::calc_max_entries(void) { uint32_t total; // key + value size_t elt_size = sizeof(uint64_t) + sizeof(typename ValueTraits::disk_type); - total = (BlockSize - sizeof(struct node_header)) / elt_size; + total = (MD_BLOCK_SIZE - sizeof(struct node_header)) / elt_size; return (total / 3) * 3; // rounds down } -template +template void * -node_ref::key_ptr(unsigned i) const +node_ref::key_ptr(unsigned i) const { return raw_->keys + i; } -template +template void * -node_ref::value_ptr(unsigned i) const +node_ref::value_ptr(unsigned i) const { void *value_base = &raw_->keys[to_cpu(raw_->header.max_entries)]; return static_cast(value_base) + sizeof(typename ValueTraits::disk_type) * i; } -template +template template void -node_ref::inc_children(RefCounter &rc) +node_ref::inc_children(RefCounter &rc) { unsigned nr_entries = get_nr_entries(); for (unsigned i = 0; i < nr_entries; i++) { @@ -265,9 +264,9 @@ node_ref::inc_children(RefCounter &rc) //---------------------------------------------------------------- -template -btree:: -btree(typename transaction_manager::ptr tm, +template +btree:: +btree(typename transaction_manager::ptr tm, typename ValueTraits::ref_counter rc) : tm_(tm), destroy_(false), @@ -277,7 +276,7 @@ btree(typename transaction_manager::ptr tm, write_ref root = tm_->new_block(); - leaf_node n = to_node(root); + leaf_node n = to_node(root); n.set_type(btree_detail::LEAF); n.set_nr_entries(0); n.set_max_entries(); @@ -285,9 +284,9 @@ btree(typename transaction_manager::ptr tm, root_ = root.get_location(); } -template -btree:: -btree(typename transaction_manager::ptr tm, +template +btree:: +btree(typename transaction_manager::ptr tm, block_address root, typename ValueTraits::ref_counter rc) : tm_(tm), @@ -297,54 +296,54 @@ btree(typename transaction_manager::ptr tm, { } -template -btree::~btree() +template +btree::~btree() { } -template -typename btree::maybe_value -btree::lookup(key const &key) const +template +typename btree::maybe_value +btree::lookup(key const &key) const { using namespace btree_detail; - ro_spine spine(tm_); + ro_spine spine(tm_); block_address root = root_; for (unsigned level = 0; level < Levels - 1; ++level) { optional mroot = - lookup_raw(spine, root, key[level]); + lookup_raw(spine, root, key[level]); if (!mroot) return maybe_value(); root = *mroot; } - return lookup_raw(spine, root, key[Levels - 1]); + return lookup_raw(spine, root, key[Levels - 1]); } -template -typename btree::maybe_pair -btree::lookup_le(key const &key) const +template +typename btree::maybe_pair +btree::lookup_le(key const &key) const { using namespace btree_detail; return maybe_pair(); } -template -typename btree::maybe_pair -btree::lookup_ge(key const &key) const +template +typename btree::maybe_pair +btree::lookup_ge(key const &key) const { using namespace btree_detail; return maybe_pair(); } -template +template void -btree:: +btree:: insert(key const &key, typename ValueTraits::value_type const &value) { @@ -352,14 +351,14 @@ insert(key const &key, block_address block = root_; int index = 0; // FIXME: ??? - shadow_spine spine(tm_); + shadow_spine spine(tm_); for (unsigned level = 0; level < Levels - 1; ++level) { bool need_insert = insert_location(spine, block, key[level], &index); internal_node n = spine.template get_node(); if (need_insert) { - btree new_tree(tm_, rc_); + btree new_tree(tm_, rc_); n.insert_at(index, key[level], new_tree.get_root()); } @@ -376,78 +375,78 @@ insert(key const &key, n.set_value(index, value); } -template +template void -btree::remove(key const &key) +btree::remove(key const &key) { using namespace btree_detail; } -template +template block_address -btree::get_root() const +btree::get_root() const { return root_; } -template +template void -btree::set_root(block_address root) +btree::set_root(block_address root) { using namespace btree_detail; root_ = root; } -template -typename btree::ptr -btree::clone() const +template +typename btree::ptr +btree::clone() const { using namespace btree_detail; - ro_spine spine(tm_); + ro_spine spine(tm_); spine.step(root_); write_ref new_root = tm_->new_block(); internal_node o = spine.template get_node(); if (o.get_type() == INTERNAL) { - internal_node n = to_node(new_root); - ::memcpy(n.raw(), o.raw(), BlockSize); + internal_node n = to_node(new_root); + ::memcpy(n.raw(), o.raw(), MD_BLOCK_SIZE); typename uint64_traits::ref_counter rc(internal_rc_); n.inc_children(rc); } else { - leaf_node n = to_node(new_root); - ::memcpy(n.raw(), o.raw(), BlockSize); + leaf_node n = to_node(new_root); + ::memcpy(n.raw(), o.raw(), MD_BLOCK_SIZE); typename ValueTraits::ref_counter rc(rc_); n.inc_children(rc); } - return btree::ptr( - new btree( + return btree::ptr( + new btree( tm_, new_root.get_location(), rc_)); } #if 0 -template +template void -btree::destroy() +btree::destroy() { using namespace btree_detail; } #endif -template +template template void -btree:: -split_node(btree_detail::shadow_spine &spine, +btree:: +split_node(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key, bool top) { - node_ref n = spine.template get_node(); + node_ref n = spine.template get_node(); if (n.get_nr_entries() == n.get_max_entries()) { if (top) split_beneath(spine, key); @@ -456,11 +455,11 @@ split_node(btree_detail::shadow_spine &spine, } } -template +template template void -btree:: -split_beneath(btree_detail::shadow_spine &spine, +btree:: +split_beneath(btree_detail::shadow_spine &spine, uint64_t key) { using namespace btree_detail; @@ -469,17 +468,17 @@ split_beneath(btree_detail::shadow_spine &spine, unsigned nr_left, nr_right; write_ref left = tm_->new_block(); - node_ref l = to_node(left); + node_ref l = to_node(left); l.set_nr_entries(0); l.set_max_entries(); write_ref right = tm_->new_block(); - node_ref r = to_node(right); + node_ref r = to_node(right); r.set_nr_entries(0); r.set_max_entries(); { - node_ref p = spine.template get_node(); + node_ref p = spine.template get_node(); nr_left = p.get_nr_entries() / 2; nr_right = p.get_nr_entries() - nr_left; type = p.get_type(); @@ -508,21 +507,21 @@ split_beneath(btree_detail::shadow_spine &spine, spine.step(right); } -template +template template void -btree:: -split_sibling(btree_detail::shadow_spine &spine, +btree:: +split_sibling(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key) { using namespace btree_detail; - node_ref l = spine.template get_node(); + node_ref l = spine.template get_node(); block_address left = spine.get_block(); write_ref right = tm_->new_block(); - node_ref r = to_node(right); + node_ref r = to_node(right); unsigned nr_left = l.get_nr_entries() / 2; unsigned nr_right = l.get_nr_entries() - nr_left; @@ -545,11 +544,11 @@ split_sibling(btree_detail::shadow_spine &spine, } // Returns true if we need a new insertion, rather than overwrite. -template +template template bool -btree:: -insert_location(btree_detail::shadow_spine &spine, +btree:: +insert_location(btree_detail::shadow_spine &spine, block_address block, uint64_t key, int *index) @@ -595,7 +594,7 @@ insert_location(btree_detail::shadow_spine &spine, top = false; } - node_ref leaf = spine.template get_node(); + node_ref leaf = spine.template get_node(); // FIXME: gross if (i < 0 || leaf.key_at(i) != key) i++; @@ -611,16 +610,16 @@ insert_location(btree_detail::shadow_spine &spine, (leaf.key_at(i) != key)); } -template +template void -btree::visit(typename visitor::ptr visitor) const +btree::visit(typename visitor::ptr visitor) const { walk_tree(visitor, 0, true, root_); } -template +template void -btree:: +btree:: walk_tree(typename visitor::ptr visitor, unsigned level, bool is_root, block_address b) const @@ -628,7 +627,7 @@ walk_tree(typename visitor::ptr visitor, using namespace btree_detail; read_ref blk = tm_->read_lock(b); - internal_node o = to_node(blk); + internal_node o = to_node(blk); if (o.get_type() == INTERNAL) { if (visitor->visit_internal(level, is_root, o)) for (unsigned i = 0; i < o.get_nr_entries(); i++) @@ -640,7 +639,7 @@ walk_tree(typename visitor::ptr visitor, walk_tree(visitor, level + 1, true, o.value_at(i)); } else { - leaf_node ov = to_node(blk); + leaf_node ov = to_node(blk); visitor->visit_leaf(level, is_root, ov); } } diff --git a/btree_validator.h b/btree_validator.h index 3a19e2f..bcb84f4 100644 --- a/btree_validator.h +++ b/btree_validator.h @@ -64,8 +64,8 @@ namespace persistent_data { // - checksum // - leaf | internal flags (this can be inferred from siblings) //---------------------------------------------------------------- - template - class btree_validator : public btree::visitor { + template + class btree_validator : public btree::visitor { public: btree_validator(block_counter &counter) : counter_(counter), @@ -73,7 +73,7 @@ namespace persistent_data { } bool visit_internal(unsigned level, bool is_root, - btree_detail::node_ref const &n) { + btree_detail::node_ref const &n) { if (already_visited(n)) return false; @@ -84,7 +84,7 @@ namespace persistent_data { } bool visit_internal_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { + btree_detail::node_ref const &n) { if (already_visited(n)) return false; @@ -95,7 +95,7 @@ namespace persistent_data { } bool visit_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { + btree_detail::node_ref const &n) { if (already_visited(n)) return false; @@ -143,7 +143,7 @@ namespace persistent_data { template void check_max_entries(node const &n) const { size_t elt_size = sizeof(uint64_t) + n.get_value_size(); - if (elt_size * n.get_max_entries() + sizeof(node_header) > BlockSize) { + if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) { std::ostringstream out; out << "max entries too large: " << n.get_max_entries(); errs_->add_child(out.str()); diff --git a/metadata.cc b/metadata.cc index 9f7e6ad..3b974e9 100644 --- a/metadata.cc +++ b/metadata.cc @@ -25,17 +25,17 @@ namespace { // FIXME: get the file size unsigned const NR_BLOCKS = 1024; - transaction_manager<4096>::ptr + transaction_manager::ptr open_tm(string const &dev_path) { - block_manager<4096>::ptr bm(new block_manager<4096>(dev_path, NR_BLOCKS)); + block_manager<>::ptr bm(new block_manager<>(dev_path, NR_BLOCKS)); space_map::ptr sm(new core_map(NR_BLOCKS)); - transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); return tm; } - superblock read_superblock(block_manager<4096>::ptr bm) { + superblock read_superblock(block_manager<>::ptr bm) { superblock sb; - block_manager<4096>::read_ref r = bm->read_lock(SUPERBLOCK_LOCATION); + block_manager<>::read_ref r = bm->read_lock(SUPERBLOCK_LOCATION); superblock_disk const *sbd = reinterpret_cast(&r.data()); superblock_traits::unpack(*sbd, sb); return sb; @@ -45,21 +45,21 @@ namespace { // devices having mappings defined, which can later be cross // referenced with the details tree. A separate block_counter is // used to later verify the data space map. - class mapping_validator : public btree_validator<2, block_traits, MD_BLOCK_SIZE> { + class mapping_validator : public btree_validator<2, block_traits> { public: typedef boost::shared_ptr ptr; mapping_validator(block_counter &metadata_counter, block_counter &data_counter) - : btree_validator<2, block_traits, MD_BLOCK_SIZE>(metadata_counter), + : btree_validator<2, block_traits>(metadata_counter), data_counter_(data_counter) { } // Sharing can only occur in level 1 nodes. // FIXME: not true once we start having held roots. bool visit_internal_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { + btree_detail::node_ref const &n) { - bool r = btree_validator<2, block_traits, MD_BLOCK_SIZE>::visit_internal_leaf(level, is_root, n); + bool r = btree_validator<2, block_traits>::visit_internal_leaf(level, is_root, n); if (!r && level == 0) { throw runtime_error("unexpected sharing in level 0 of mapping tree."); } @@ -71,8 +71,8 @@ namespace { } bool visit_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { - bool r = btree_validator<2, block_traits, MD_BLOCK_SIZE>::visit_leaf(level, is_root, n); + btree_detail::node_ref const &n) { + bool r = btree_validator<2, block_traits>::visit_leaf(level, is_root, n); if (r) for (unsigned i = 0; i < n.get_nr_entries(); i++) @@ -90,17 +90,17 @@ namespace { set devices_; }; - class details_validator : public btree_validator<1, device_details_traits, MD_BLOCK_SIZE> { + class details_validator : public btree_validator<1, device_details_traits> { public: typedef boost::shared_ptr ptr; details_validator(block_counter &counter) - : btree_validator<1, device_details_traits, MD_BLOCK_SIZE>(counter) { + : btree_validator<1, device_details_traits>(counter) { } bool visit_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { - bool r = btree_validator<1, device_details_traits, MD_BLOCK_SIZE>::visit_leaf(level, is_root, n); + btree_detail::node_ref const &n) { + bool r = btree_validator<1, device_details_traits>::visit_leaf(level, is_root, n); if (r) for (unsigned i = 0; i < n.get_nr_entries(); i++) @@ -196,10 +196,10 @@ thin::set_mapped_blocks(block_address count) metadata::metadata(std::string const &dev_path) : tm_(open_tm(dev_path)), sb_(read_superblock(tm_->get_bm())), - metadata_sm_(open_metadata_sm(tm_, static_cast(&sb_.metadata_space_map_root_))), - data_sm_(open_disk_sm(tm_, static_cast(&sb_.data_space_map_root_))), + metadata_sm_(open_metadata_sm(tm_, static_cast(&sb_.metadata_space_map_root_))), + data_sm_(open_disk_sm(tm_, static_cast(&sb_.data_space_map_root_))), details_(tm_, sb_.device_details_root_, device_details_traits::ref_counter()), - mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter(tm_)), + mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter(tm_)), mappings_(tm_, sb_.data_mapping_root_, block_time_ref_counter(data_sm_)) { #if 0 diff --git a/metadata.h b/metadata.h index 3c164f2..732fbcb 100644 --- a/metadata.h +++ b/metadata.h @@ -16,8 +16,6 @@ //---------------------------------------------------------------- namespace thin_provisioning { - unsigned const MD_BLOCK_SIZE = 4096; - // FIXME: don't use namespaces in a header using namespace base; using namespace persistent_data; @@ -87,10 +85,9 @@ namespace thin_provisioning { //------------------------------------------------ - template class mtree_ref_counter { public: - mtree_ref_counter(typename transaction_manager::ptr tm) + mtree_ref_counter(transaction_manager::ptr tm) : tm_(tm) { } @@ -101,14 +98,13 @@ namespace thin_provisioning { } private: - typename transaction_manager::ptr tm_; + transaction_manager::ptr tm_; }; - template struct mtree_traits { typedef base::__le64 disk_type; typedef uint64_t value_type; - typedef mtree_ref_counter ref_counter; + typedef mtree_ref_counter ref_counter; static void unpack(disk_type const &disk, value_type &value) { value = base::to_cpu(disk); @@ -146,8 +142,8 @@ namespace thin_provisioning { class metadata { public: typedef boost::shared_ptr ptr; - typedef block_manager::read_ref read_ref; - typedef block_manager::write_ref write_ref; + typedef block_manager<>::read_ref read_ref; + typedef block_manager<>::write_ref write_ref; metadata(std::string const &dev_path); ~metadata(); @@ -173,27 +169,30 @@ namespace thin_provisioning { thin::ptr open_thin(thin_dev_t); - // Validation and repair + // Validation boost::optional check(); + // Dumping metadata + void dump(); + private: friend class thin; bool device_exists(thin_dev_t dev) const; - typedef persistent_data::transaction_manager::ptr tm_ptr; + typedef persistent_data::transaction_manager::ptr tm_ptr; - typedef persistent_data::btree<1, device_details_traits, MD_BLOCK_SIZE> detail_tree; - typedef persistent_data::btree<1, mtree_traits, MD_BLOCK_SIZE> dev_tree; - typedef persistent_data::btree<2, block_traits, MD_BLOCK_SIZE> mapping_tree; - typedef persistent_data::btree<1, block_traits, MD_BLOCK_SIZE> single_mapping_tree; + typedef persistent_data::btree<1, device_details_traits> detail_tree; + typedef persistent_data::btree<1, mtree_traits> dev_tree; + typedef persistent_data::btree<2, block_traits> mapping_tree; + typedef persistent_data::btree<1, block_traits> single_mapping_tree; // Declaration order is important here tm_ptr tm_; superblock sb_; - sm_disk_detail::sm_metadata::ptr metadata_sm_; - sm_disk_detail::sm_disk::ptr data_sm_; + checked_space_map::ptr metadata_sm_; + checked_space_map::ptr data_sm_; detail_tree details_; dev_tree mappings_top_level_; mapping_tree mappings_; diff --git a/space_map_disk.cc b/space_map_disk.cc new file mode 100644 index 0000000..281fce6 --- /dev/null +++ b/space_map_disk.cc @@ -0,0 +1,513 @@ +#include "space_map_disk.h" + +#include "endian_utils.h" +#include "math_utils.h" +#include "space_map_disk_structures.h" +#include "transaction_manager.h" + +using namespace boost; +using namespace persistent_data; +using namespace std; +using namespace sm_disk_detail; + + +//---------------------------------------------------------------- + +namespace { + class bitmap { + public: + typedef transaction_manager::read_ref read_ref; + typedef transaction_manager::write_ref write_ref; + + bitmap(transaction_manager::ptr tm, + index_entry const &ie) + : tm_(tm), + ie_(ie) { + } + + ref_t lookup(unsigned b) const { + read_ref rr = tm_->read_lock(ie_.blocknr_); + void const *bits = bitmap_data(rr); + ref_t b1 = test_bit_le(bits, b * 2); + ref_t b2 = test_bit_le(bits, b * 2 + 1); + ref_t result = b2 ? 1 : 0; + result |= b1 ? 0b10 : 0; + return result; + } + + void insert(unsigned b, ref_t n) { + write_ref wr = tm_->shadow(ie_.blocknr_).first; + void *bits = bitmap_data(wr); + bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1); + if (n == 1 || n == 3) + set_bit_le(bits, b * 2 + 1); + else + clear_bit_le(bits, b * 2 + 1); + + if (n == 2 || n == 3) + set_bit_le(bits, b * 2); + else + clear_bit_le(bits, b * 2); + + ie_.blocknr_ = wr.get_location(); + + if (was_free && n > 0) { + ie_.nr_free_--; + if (b == ie_.none_free_before_) + ie_.none_free_before_++; + } + + if (!was_free && n == 0) { + ie_.nr_free_++; + if (b < ie_.none_free_before_) + ie_.none_free_before_ = b; + } + } + + unsigned find_free(unsigned end) { + for (unsigned i = ie_.none_free_before_; i < end; i++) { + if (lookup(i) == 0) { + insert(i, 1); + ie_.none_free_before_ = i + 1; + return i; + } + } + + throw std::runtime_error("no free entry in bitmap"); + } + + index_entry const &get_ie() const { + return ie_; + } + + private: + void *bitmap_data(typename transaction_manager::write_ref &wr) { + bitmap_header *h = reinterpret_cast(&wr.data()[0]); + return h + 1; + } + + void const *bitmap_data(typename transaction_manager::read_ref &rr) const { + bitmap_header const *h = reinterpret_cast(&rr.data()[0]); + return h + 1; + } + + typename transaction_manager::ptr tm_; + index_entry ie_; + }; + + struct ref_count_traits { + typedef __le32 disk_type; + typedef uint32_t value_type; + typedef NoOpRefCounter ref_counter; + + static void unpack(disk_type const &d, value_type &v) { + v = to_cpu(d); + } + + static void pack(value_type const &v, disk_type &d) { + d = to_disk(v); + } + }; + + class ref_count_validator : public btree_validator<1, ref_count_traits> { + public: + typedef boost::shared_ptr ptr; + + ref_count_validator(block_counter &counter) + : btree_validator<1, ref_count_traits>(counter) { + } + }; + + class sm_disk_base : public checked_space_map { + public: + typedef boost::shared_ptr ptr; + typedef transaction_manager::read_ref read_ref; + typedef transaction_manager::write_ref write_ref; + + sm_disk_base(transaction_manager::ptr tm) + : tm_(tm), + entries_per_block_((MD_BLOCK_SIZE - sizeof(bitmap_header)) * 4), + nr_blocks_(0), + nr_allocated_(0), + ref_counts_(tm_, ref_count_traits::ref_counter()) { + } + + sm_disk_base(typename transaction_manager::ptr tm, + sm_root const &root) + : tm_(tm), + entries_per_block_((MD_BLOCK_SIZE - sizeof(bitmap_header)) * 4), + nr_blocks_(root.nr_blocks_), + nr_allocated_(root.nr_allocated_), + ref_counts_(tm_, root.ref_count_root_, ref_count_traits::ref_counter()) { + } + + block_address get_nr_blocks() const { + return nr_blocks_; + } + + block_address get_nr_free() const { + return nr_blocks_ - nr_allocated_; + } + + ref_t get_count(block_address b) const { + ref_t count = lookup_bitmap(b); + if (count == 3) + return lookup_ref_count(b); + + return count; + } + + void set_count(block_address b, ref_t c) { + ref_t old = get_count(b); + + if (c == old) + return; + + if (c > 2) { + if (old < 3) + insert_bitmap(b, 3); + insert_ref_count(b, c); + } else { + if (old > 2) + remove_ref_count(b); + insert_bitmap(b, c); + } + + if (old == 0) + nr_allocated_++; + else if (c == 0) + nr_allocated_--; + } + + void commit() { + commit_ies(); + } + + void inc(block_address b) { + // FIXME: 2 get_counts + ref_t old = get_count(b); + set_count(b, old + 1); + } + + void dec(block_address b) { + ref_t old = get_count(b); + set_count(b, old - 1); + } + + block_address new_block() { + // silly to always start searching from the + // beginning. + block_address nr_indexes = div_up(nr_blocks_, entries_per_block_); + for (block_address index = 0; index < nr_indexes; index++) { + index_entry ie = find_ie(index); + + bitmap bm(tm_, ie); + block_address b = bm.find_free((index == nr_indexes - 1) ? + nr_blocks_ % entries_per_block_ : entries_per_block_); + save_ie(b, bm.get_ie()); + nr_allocated_++; + b = (index * entries_per_block_) + b; + assert(get_count(b) == 1); + return b; + } + + throw runtime_error("out of space"); + } + + bool count_possibly_greater_than_one(block_address b) const { + return get_count(b) > 1; + } + + virtual void extend(block_address extra_blocks) { + block_address nr_blocks = nr_blocks_ + extra_blocks; + + block_address bitmap_count = div_up(nr_blocks, entries_per_block_); + block_address old_bitmap_count = div_up(nr_blocks_, entries_per_block_); + for (block_address i = old_bitmap_count; i < bitmap_count; i++) { + write_ref wr = tm_->new_block(); + + struct index_entry ie; + ie.blocknr_ = wr.get_location(); + ie.nr_free_ = i == (bitmap_count - 1) ? + (nr_blocks % entries_per_block_) : entries_per_block_; + ie.none_free_before_ = 0; + + save_ie(i, ie); + } + + nr_blocks_ = nr_blocks; + } + + virtual void check(block_counter &counter) const { + typename ref_count_validator::ptr v(new ref_count_validator(counter)); + ref_counts_.visit(v); + } + + protected: + typename transaction_manager::ptr get_tm() const { + return tm_; + } + + block_address get_nr_allocated() const { + return nr_allocated_; + } + + block_address get_ref_count_root() const { + return ref_counts_.get_root(); + } + + unsigned get_entries_per_block() const { + return entries_per_block_; + } + + private: + virtual index_entry find_ie(block_address b) const = 0; + virtual void save_ie(block_address b, struct index_entry ie) = 0; + virtual void commit_ies() = 0; + + ref_t lookup_bitmap(block_address b) const { + index_entry ie = find_ie(b / entries_per_block_); + bitmap bm(tm_, ie); + return bm.lookup(b % entries_per_block_); + } + + void insert_bitmap(block_address b, unsigned n) { + if (n > 3) + throw runtime_error("bitmap can only hold 2 bit values"); + + index_entry ie = find_ie(b / entries_per_block_); + bitmap bm(tm_, ie); + bm.insert(b % entries_per_block_, n); + save_ie(b, bm.get_ie()); + } + + ref_t lookup_ref_count(block_address b) const { + uint64_t key[1] = {b}; + optional mvalue = ref_counts_.lookup(key); + if (!mvalue) + throw runtime_error("ref count not in tree"); + return *mvalue; + } + + void insert_ref_count(block_address b, ref_t count) { + uint64_t key[1] = {b}; + ref_counts_.insert(key, count); + } + + void remove_ref_count(block_address b) { + uint64_t key[1] = {b}; + ref_counts_.remove(key); + } + + transaction_manager::ptr tm_; + uint32_t entries_per_block_; + block_address nr_blocks_; + block_address nr_allocated_; + + btree<1, ref_count_traits> ref_counts_; + }; + + class bitmap_tree_validator : public btree_validator<1, index_entry_traits> { + public: + typedef boost::shared_ptr ptr; + + bitmap_tree_validator(block_counter &counter) + : btree_validator<1, index_entry_traits>(counter) { + } + + bool visit_leaf(unsigned level, bool is_root, + btree_detail::node_ref const &n) { + bool r = btree_validator<1, index_entry_traits>::visit_leaf(level, is_root, n); + + if (r) + for (unsigned i = 0; i < n.get_nr_entries(); i++) + btree_validator<1, index_entry_traits>::get_counter().inc(n.value_at(i).blocknr_); + + return r; + } + }; + + class sm_disk : public sm_disk_base { + public: + typedef boost::shared_ptr ptr; + + sm_disk(transaction_manager::ptr tm) + : sm_disk_base(tm), + bitmaps_(sm_disk_base::get_tm(), index_entry_traits::ref_counter()) { + } + + sm_disk(transaction_manager::ptr tm, + sm_root const &root) + : sm_disk_base(tm, root), + bitmaps_(sm_disk_base::get_tm(), root.bitmap_root_, index_entry_traits::ref_counter()) { + } + + size_t root_size() { + return sizeof(sm_root_disk); + } + + void copy_root(void *dest, size_t len) { + sm_root_disk d; + sm_root v; + + if (len < sizeof(d)) + throw runtime_error("root too small"); + + v.nr_blocks_ = sm_disk_base::get_nr_blocks(); + v.nr_allocated_ = sm_disk_base::get_nr_allocated(); + v.bitmap_root_ = bitmaps_.get_root(); + v.ref_count_root_ = sm_disk_base::get_ref_count_root(); + sm_root_traits::pack(v, d); + ::memcpy(dest, &d, sizeof(d)); + } + + void check(block_counter &counter) const { + sm_disk_base::check(counter); + + typename bitmap_tree_validator::ptr v(new bitmap_tree_validator(counter)); + bitmaps_.visit(v); + } + + private: + index_entry find_ie(block_address ie_index) const { + uint64_t key[1] = {ie_index}; + optional mindex = bitmaps_.lookup(key); + if (!mindex) + throw runtime_error("Couldn't lookup bitmap"); + + return *mindex; + } + + void save_ie(block_address ie_index, struct index_entry ie) { + uint64_t key[1] = {ie_index}; + bitmaps_.insert(key, ie); + } + + void commit_ies() { + } + + btree<1, index_entry_traits> bitmaps_; + }; + + class sm_metadata : public sm_disk_base { + public: + typedef boost::shared_ptr ptr; + + sm_metadata(transaction_manager::ptr tm) + : sm_disk_base(tm), + entries_(MAX_METADATA_BITMAPS) { + // FIXME: allocate a new bitmap root + } + + sm_metadata(transaction_manager::ptr tm, + sm_root const &root) + : sm_disk_base(tm, root), + bitmap_root_(root.bitmap_root_), + entries_(MAX_METADATA_BITMAPS) { + load_ies(); + } + + size_t root_size() { + return sizeof(sm_root_disk); + } + + // FIXME: common code + void copy_root(void *dest, size_t len) { + sm_root_disk d; + sm_root v; + + if (len < sizeof(d)) + throw runtime_error("root too small"); + + v.nr_blocks_ = sm_disk_base::get_nr_blocks(); + v.nr_allocated_ = sm_disk_base::get_nr_allocated(); + v.bitmap_root_ = bitmap_root_; + v.ref_count_root_ = sm_disk_base::get_ref_count_root(); + sm_root_traits::pack(v, d); + ::memcpy(dest, &d, sizeof(d)); + } + + void check(block_counter &counter) const { + sm_disk_base::check(counter); + + counter.inc(bitmap_root_); + for (unsigned i = 0; i < entries_.size(); i++) + if (entries_[i].blocknr_ != 0) // superblock + counter.inc(entries_[i].blocknr_); + } + + private: + index_entry find_ie(block_address ie_index) const { + return entries_[ie_index]; + } + + void save_ie(block_address ie_index, struct index_entry ie) { + entries_[ie_index] = ie; + } + + void load_ies() { + typename block_manager<>::read_ref rr = + sm_disk_base::get_tm()->read_lock(bitmap_root_); + + metadata_index const *mdi = reinterpret_cast(&rr.data()); + + unsigned nr_indexes = div_up(sm_disk_base::get_nr_blocks(), + sm_disk_base::get_entries_per_block()); + for (unsigned i = 0; i < nr_indexes; i++) + index_entry_traits::unpack(*(mdi->index + i), entries_[i]); + } + + void commit_ies() { + std::pair::write_ref, bool> p = + sm_disk_base::get_tm()->shadow(bitmap_root_); + + bitmap_root_ = p.first.get_location(); + metadata_index *mdi = reinterpret_cast(&p.first.data()); + + mdi->csum_ = to_disk<__le32, uint32_t>(0); + mdi->padding_ = to_disk<__le32, uint32_t>(0); + mdi->blocknr_ = to_disk<__le64>(bitmap_root_); + + for (unsigned i = 0; i < entries_.size(); i++) + index_entry_traits::pack(entries_[i], mdi->index[i]); + } + + block_address bitmap_root_; + std::vector entries_; + }; +} + +//---------------------------------------------------------------- + +checked_space_map::ptr +persistent_data::create_disk_sm(transaction_manager::ptr tm, + block_address nr_blocks) +{ + checked_space_map::ptr sm(new sm_disk(tm)); + sm->extend(nr_blocks); + return sm; +} + +checked_space_map::ptr +persistent_data::open_disk_sm(transaction_manager::ptr tm, void *root) +{ + sm_root_disk d; + sm_root v; + + ::memcpy(&d, root, sizeof(d)); + sm_root_traits::unpack(d, v); + return checked_space_map::ptr(new sm_disk(tm, v)); +} + +checked_space_map::ptr +persistent_data::open_metadata_sm(transaction_manager::ptr tm, void * root) +{ + sm_root_disk d; + sm_root v; + + ::memcpy(&d, root, sizeof(d)); + sm_root_traits::unpack(d, v); + return checked_space_map::ptr(new sm_metadata(tm, v)); +} + +//---------------------------------------------------------------- diff --git a/space_map_disk.h b/space_map_disk.h index a5540e6..e5b0863 100644 --- a/space_map_disk.h +++ b/space_map_disk.h @@ -3,531 +3,25 @@ #include "btree_validator.h" #include "space_map.h" -#include "transaction_manager.h" -#include "endian_utils.h" -#include "space_map_disk_structures.h" -#include "math_utils.h" //---------------------------------------------------------------- namespace persistent_data { + class checked_space_map : public persistent_space_map { + public: + typedef boost::shared_ptr ptr; - namespace sm_disk_detail { - using namespace base; - using namespace persistent_data; + virtual void check(block_counter &counter) const = 0; + }; - template - class bitmap { - public: - typedef typename transaction_manager::read_ref read_ref; - typedef typename transaction_manager::write_ref write_ref; + checked_space_map::ptr + create_disk_sm(transaction_manager::ptr tm, block_address nr_blocks); - bitmap(typename transaction_manager::ptr tm, - index_entry const &ie) - : tm_(tm), - ie_(ie) { - } + checked_space_map::ptr + open_disk_sm(transaction_manager::ptr tm, void *root); - ref_t lookup(unsigned b) const { - read_ref rr = tm_->read_lock(ie_.blocknr_); - void const *bits = bitmap_data(rr); - ref_t b1 = test_bit_le(bits, b * 2); - ref_t b2 = test_bit_le(bits, b * 2 + 1); - ref_t result = b2 ? 1 : 0; - result |= b1 ? 0b10 : 0; - return result; - } - - void insert(unsigned b, ref_t n) { - write_ref wr = tm_->shadow(ie_.blocknr_).first; - void *bits = bitmap_data(wr); - bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1); - if (n == 1 || n == 3) - set_bit_le(bits, b * 2 + 1); - else - clear_bit_le(bits, b * 2 + 1); - - if (n == 2 || n == 3) - set_bit_le(bits, b * 2); - else - clear_bit_le(bits, b * 2); - - ie_.blocknr_ = wr.get_location(); - - if (was_free && n > 0) { - ie_.nr_free_--; - if (b == ie_.none_free_before_) - ie_.none_free_before_++; - } - - if (!was_free && n == 0) { - ie_.nr_free_++; - if (b < ie_.none_free_before_) - ie_.none_free_before_ = b; - } - } - - unsigned find_free(unsigned end) { - for (unsigned i = ie_.none_free_before_; i < end; i++) { - if (lookup(i) == 0) { - insert(i, 1); - ie_.none_free_before_ = i + 1; - return i; - } - } - - throw std::runtime_error("no free entry in bitmap"); - } - - index_entry const &get_ie() const { - return ie_; - } - - private: - void *bitmap_data(typename transaction_manager::write_ref &wr) { - bitmap_header *h = reinterpret_cast(&wr.data()[0]); - return h + 1; - } - - void const *bitmap_data(typename transaction_manager::read_ref &rr) const { - bitmap_header const *h = reinterpret_cast(&rr.data()[0]); - return h + 1; - } - - typename transaction_manager::ptr tm_; - index_entry ie_; - }; - - struct ref_count_traits { - typedef __le32 disk_type; - typedef uint32_t value_type; - typedef NoOpRefCounter ref_counter; - - static void unpack(disk_type const &d, value_type &v) { - v = to_cpu(d); - } - - static void pack(value_type const &v, disk_type &d) { - d = to_disk(v); - } - }; - - template - class ref_count_validator : public btree_validator<1, ref_count_traits, BlockSize> { - public: - typedef boost::shared_ptr ptr; - - ref_count_validator(block_counter &counter) - : btree_validator<1, ref_count_traits, BlockSize>(counter) { - } - }; - - template - class sm_disk_base : public persistent_space_map { - public: - typedef boost::shared_ptr > ptr; - typedef typename transaction_manager::read_ref read_ref; - typedef typename transaction_manager::write_ref write_ref; - - sm_disk_base(typename transaction_manager::ptr tm) - : tm_(tm), - entries_per_block_((BlockSize - sizeof(bitmap_header)) * 4), - nr_blocks_(0), - nr_allocated_(0), - ref_counts_(tm_, ref_count_traits::ref_counter()) { - } - - sm_disk_base(typename transaction_manager::ptr tm, - sm_root const &root) - : tm_(tm), - entries_per_block_((BlockSize - sizeof(bitmap_header)) * 4), - nr_blocks_(root.nr_blocks_), - nr_allocated_(root.nr_allocated_), - ref_counts_(tm_, root.ref_count_root_, ref_count_traits::ref_counter()) { - } - - block_address get_nr_blocks() const { - return nr_blocks_; - } - - block_address get_nr_free() const { - return nr_blocks_ - nr_allocated_; - } - - ref_t get_count(block_address b) const { - ref_t count = lookup_bitmap(b); - if (count == 3) - return lookup_ref_count(b); - - return count; - } - - void set_count(block_address b, ref_t c) { - ref_t old = get_count(b); - - if (c == old) - return; - - if (c > 2) { - if (old < 3) - insert_bitmap(b, 3); - insert_ref_count(b, c); - } else { - if (old > 2) - remove_ref_count(b); - insert_bitmap(b, c); - } - - if (old == 0) - nr_allocated_++; - else if (c == 0) - nr_allocated_--; - } - - void commit() { - commit_ies(); - } - - void inc(block_address b) { - // FIXME: 2 get_counts - ref_t old = get_count(b); - set_count(b, old + 1); - } - - void dec(block_address b) { - ref_t old = get_count(b); - set_count(b, old - 1); - } - - block_address new_block() { - // silly to always start searching from the - // beginning. - block_address nr_indexes = div_up(nr_blocks_, entries_per_block_); - for (block_address index = 0; index < nr_indexes; index++) { - index_entry ie = find_ie(index); - - bitmap bm(tm_, ie); - block_address b = bm.find_free((index == nr_indexes - 1) ? - nr_blocks_ % entries_per_block_ : entries_per_block_); - save_ie(b, bm.get_ie()); - nr_allocated_++; - b = (index * entries_per_block_) + b; - assert(get_count(b) == 1); - return b; - } - - throw runtime_error("out of space"); - } - - bool count_possibly_greater_than_one(block_address b) const { - return get_count(b) > 1; - } - - virtual void extend(block_address extra_blocks) { - block_address nr_blocks = nr_blocks_ + extra_blocks; - - block_address bitmap_count = div_up(nr_blocks, entries_per_block_); - block_address old_bitmap_count = div_up(nr_blocks_, entries_per_block_); - for (block_address i = old_bitmap_count; i < bitmap_count; i++) { - write_ref wr = tm_->new_block(); - - struct index_entry ie; - ie.blocknr_ = wr.get_location(); - ie.nr_free_ = i == (bitmap_count - 1) ? - (nr_blocks % entries_per_block_) : entries_per_block_; - ie.none_free_before_ = 0; - - save_ie(i, ie); - } - - nr_blocks_ = nr_blocks; - } - - virtual void check(block_counter &counter) const { - typename ref_count_validator::ptr v(new ref_count_validator(counter)); - ref_counts_.visit(v); - } - - protected: - typename transaction_manager::ptr get_tm() const { - return tm_; - } - - block_address get_nr_allocated() const { - return nr_allocated_; - } - - block_address get_ref_count_root() const { - return ref_counts_.get_root(); - } - - unsigned get_entries_per_block() const { - return entries_per_block_; - } - - private: - virtual index_entry find_ie(block_address b) const = 0; - virtual void save_ie(block_address b, struct index_entry ie) = 0; - virtual void commit_ies() = 0; - - ref_t lookup_bitmap(block_address b) const { - index_entry ie = find_ie(b / entries_per_block_); - bitmap bm(tm_, ie); - return bm.lookup(b % entries_per_block_); - } - - void insert_bitmap(block_address b, unsigned n) { - if (n > 3) - throw runtime_error("bitmap can only hold 2 bit values"); - - index_entry ie = find_ie(b / entries_per_block_); - bitmap bm(tm_, ie); - bm.insert(b % entries_per_block_, n); - save_ie(b, bm.get_ie()); - } - - ref_t lookup_ref_count(block_address b) const { - uint64_t key[1] = {b}; - optional mvalue = ref_counts_.lookup(key); - if (!mvalue) - throw runtime_error("ref count not in tree"); - return *mvalue; - } - - void insert_ref_count(block_address b, ref_t count) { - uint64_t key[1] = {b}; - ref_counts_.insert(key, count); - } - - void remove_ref_count(block_address b) { - uint64_t key[1] = {b}; - ref_counts_.remove(key); - } - - typename transaction_manager::ptr tm_; - uint32_t entries_per_block_; - block_address nr_blocks_; - block_address nr_allocated_; - - btree<1, ref_count_traits, BlockSize> ref_counts_; - }; - - template - class bitmap_tree_validator : public btree_validator<1, index_entry_traits, BlockSize> { - public: - typedef boost::shared_ptr ptr; - - bitmap_tree_validator(block_counter &counter) - : btree_validator<1, index_entry_traits, BlockSize>(counter) { - } - - bool visit_leaf(unsigned level, bool is_root, - btree_detail::node_ref const &n) { - bool r = btree_validator<1, index_entry_traits, BlockSize>::visit_leaf(level, is_root, n); - - if (r) - for (unsigned i = 0; i < n.get_nr_entries(); i++) - btree_validator<1, index_entry_traits, BlockSize>::get_counter().inc(n.value_at(i).blocknr_); - - return r; - } - }; - - template - class sm_disk : public sm_disk_base { - public: - typedef boost::shared_ptr > ptr; - - sm_disk(typename transaction_manager::ptr tm) - : sm_disk_base(tm), - bitmaps_(sm_disk_base::get_tm(), typename index_entry_traits::ref_counter()) { - } - - sm_disk(typename transaction_manager::ptr tm, - sm_root const &root) - : sm_disk_base(tm, root), - bitmaps_(sm_disk_base::get_tm(), root.bitmap_root_, typename index_entry_traits::ref_counter()) { - } - - size_t root_size() { - return sizeof(sm_root_disk); - } - - void copy_root(void *dest, size_t len) { - sm_root_disk d; - sm_root v; - - if (len < sizeof(d)) - throw runtime_error("root too small"); - - v.nr_blocks_ = sm_disk_base::get_nr_blocks(); - v.nr_allocated_ = sm_disk_base::get_nr_allocated(); - v.bitmap_root_ = bitmaps_.get_root(); - v.ref_count_root_ = sm_disk_base::get_ref_count_root(); - sm_root_traits::pack(v, d); - ::memcpy(dest, &d, sizeof(d)); - } - - void check(block_counter &counter) const { - sm_disk_base::check(counter); - - typename bitmap_tree_validator::ptr v(new bitmap_tree_validator(counter)); - bitmaps_.visit(v); - } - - private: - index_entry find_ie(block_address ie_index) const { - uint64_t key[1] = {ie_index}; - optional mindex = bitmaps_.lookup(key); - if (!mindex) - throw runtime_error("Couldn't lookup bitmap"); - - return *mindex; - } - - void save_ie(block_address ie_index, struct index_entry ie) { - uint64_t key[1] = {ie_index}; - bitmaps_.insert(key, ie); - } - - void commit_ies() { - } - - btree<1, index_entry_traits, BlockSize> bitmaps_; - }; - - template - class sm_metadata : public sm_disk_base { - public: - typedef boost::shared_ptr > ptr; - - sm_metadata(typename transaction_manager::ptr tm) - : sm_disk_base(tm), - entries_(MAX_METADATA_BITMAPS) { - // FIXME: allocate a new bitmap root - } - - sm_metadata(typename transaction_manager::ptr tm, - sm_root const &root) - : sm_disk_base(tm, root), - bitmap_root_(root.bitmap_root_), - entries_(MAX_METADATA_BITMAPS) { - load_ies(); - } - - size_t root_size() { - return sizeof(sm_root_disk); - } - - // FIXME: common code - void copy_root(void *dest, size_t len) { - sm_root_disk d; - sm_root v; - - if (len < sizeof(d)) - throw runtime_error("root too small"); - - v.nr_blocks_ = sm_disk_base::get_nr_blocks(); - v.nr_allocated_ = sm_disk_base::get_nr_allocated(); - v.bitmap_root_ = bitmap_root_; - v.ref_count_root_ = sm_disk_base::get_ref_count_root(); - sm_root_traits::pack(v, d); - ::memcpy(dest, &d, sizeof(d)); - } - - void check(block_counter &counter) const { - sm_disk_base::check(counter); - - counter.inc(bitmap_root_); - for (unsigned i = 0; i < entries_.size(); i++) - if (entries_[i].blocknr_ != 0) // superblock - counter.inc(entries_[i].blocknr_); - } - - private: - index_entry find_ie(block_address ie_index) const { - return entries_[ie_index]; - } - - void save_ie(block_address ie_index, struct index_entry ie) { - entries_[ie_index] = ie; - } - - void load_ies() { - typename block_manager::read_ref rr = - sm_disk_base::get_tm()->read_lock(bitmap_root_); - - metadata_index const *mdi = reinterpret_cast(&rr.data()); - - unsigned nr_indexes = div_up(sm_disk_base::get_nr_blocks(), - sm_disk_base::get_entries_per_block()); - for (unsigned i = 0; i < nr_indexes; i++) - index_entry_traits::unpack(*(mdi->index + i), entries_[i]); - } - - void commit_ies() { - std::pair::write_ref, bool> p = - sm_disk_base::get_tm()->shadow(bitmap_root_); - - bitmap_root_ = p.first.get_location(); - metadata_index *mdi = reinterpret_cast(&p.first.data()); - - mdi->csum_ = to_disk<__le32, uint32_t>(0); - mdi->padding_ = to_disk<__le32, uint32_t>(0); - mdi->blocknr_ = to_disk<__le64>(bitmap_root_); - - for (unsigned i = 0; i < entries_.size(); i++) - index_entry_traits::pack(entries_[i], mdi->index[i]); - } - - block_address bitmap_root_; - std::vector entries_; - }; - } - - template - typename sm_disk_detail::sm_disk::ptr - create_disk_sm(typename transaction_manager::ptr tm, - block_address nr_blocks) - { - using namespace sm_disk_detail; - typename sm_disk_detail::sm_disk::ptr sm( - new sm_disk(tm)); - sm->extend(nr_blocks); - return sm; - } - - template - typename sm_disk_detail::sm_disk::ptr - open_disk_sm(typename transaction_manager::ptr tm, - void *root) - { - using namespace sm_disk_detail; - - sm_root_disk d; - sm_root v; - - ::memcpy(&d, root, sizeof(d)); - sm_root_traits::unpack(d, v); - return typename sm_disk::ptr( - new sm_disk(tm, v)); - } - - template - typename sm_disk_detail::sm_metadata::ptr - open_metadata_sm(typename transaction_manager::ptr tm, - void * root) - { - using namespace sm_disk_detail; - - sm_root_disk d; - sm_root v; - - ::memcpy(&d, root, sizeof(d)); - sm_root_traits::unpack(d, v); - return typename sm_metadata::ptr( - new sm_metadata(tm, v)); - } + checked_space_map::ptr + open_metadata_sm(transaction_manager::ptr tm, void * root); } //---------------------------------------------------------------- diff --git a/thin_dump.cc b/thin_dump.cc index 96df939..8ebf8f4 100644 --- a/thin_dump.cc +++ b/thin_dump.cc @@ -6,11 +6,14 @@ using namespace persistent_data; using namespace std; using namespace thin_provisioning; +//---------------------------------------------------------------- + namespace { void dump(string const &path) { metadata md(path); + human_readable::ptr emitter(new human_readable); - md.check(); + md.dump(); } void usage(string const &cmd) { @@ -29,3 +32,5 @@ int main(int argc, char **argv) return 0; } + +//---------------------------------------------------------------- diff --git a/transaction_manager.tcc b/transaction_manager.cc similarity index 51% rename from transaction_manager.tcc rename to transaction_manager.cc index 90f1d3f..99c59d5 100644 --- a/transaction_manager.tcc +++ b/transaction_manager.cc @@ -8,32 +8,27 @@ using namespace std; //---------------------------------------------------------------- -template -transaction_manager::transaction_manager(typename block_manager::ptr bm, - space_map::ptr sm) +transaction_manager::transaction_manager(typename block_manager<>::ptr bm, + space_map::ptr sm) : bm_(bm), sm_(sm) { } -template -transaction_manager::~transaction_manager() +transaction_manager::~transaction_manager() { } -template -typename transaction_manager::write_ref -transaction_manager::begin(block_address superblock) +transaction_manager::write_ref +transaction_manager::begin(block_address superblock) { write_ref wr = bm_->superblock(superblock); wipe_shadow_table(); return wr; } -template -typename transaction_manager::write_ref -transaction_manager::begin(block_address superblock, - validator v) +transaction_manager::write_ref +transaction_manager::begin(block_address superblock, validator v) { write_ref wr = bm_->superblock(superblock, v); wipe_shadow_table(); @@ -41,9 +36,8 @@ transaction_manager::begin(block_address superblock, } // FIXME: these explicit try/catches are gross -template -typename transaction_manager::write_ref -transaction_manager::new_block() +transaction_manager::write_ref +transaction_manager::new_block() { block_address b = sm_->new_block(); try { @@ -61,9 +55,8 @@ transaction_manager::new_block() } } -template -typename transaction_manager::write_ref -transaction_manager::new_block(validator v) +transaction_manager::write_ref +transaction_manager::new_block(validator v) { block_address b = sm_->new_block(); try { @@ -81,9 +74,8 @@ transaction_manager::new_block(validator v) } // FIXME: make exception safe -template -pair::write_ref, bool> -transaction_manager::shadow(block_address orig) +pair +transaction_manager::shadow(block_address orig) { if (is_shadow(orig) && !sm_->count_possibly_greater_than_one(orig)) @@ -91,7 +83,7 @@ transaction_manager::shadow(block_address orig) read_ref src = bm_->read_lock(orig); write_ref dest = bm_->write_lock_zero(sm_->new_block()); - ::memcpy(dest.data(), src.data(), BlockSize); + ::memcpy(dest.data(), src.data(), MD_BLOCK_SIZE); ref_t count = sm_->get_count(orig); if (count == 0) @@ -102,9 +94,8 @@ transaction_manager::shadow(block_address orig) } // FIXME: duplicate code -template -pair::write_ref, bool> -transaction_manager::shadow(block_address orig, validator v) +pair +transaction_manager::shadow(block_address orig, validator v) { if (is_shadow(orig) && sm_->count_possibly_greater_than_one(orig)) @@ -112,54 +103,48 @@ transaction_manager::shadow(block_address orig, validator v) read_ref src = bm_->read_lock(orig, v); write_ref dest = bm_->write_lock_zero(sm_->new_block(), v); - ::memcpy(dest->data_, src->data_, BlockSize); + ::memcpy(dest.data(), src.data(), MD_BLOCK_SIZE); ref_t count = sm_->get_count(orig); if (count == 0) throw runtime_error("shadowing free block"); sm_->dec(orig); - add_shadow(dest->location_); + add_shadow(dest.get_location()); return make_pair(dest, count > 1); } -template -typename transaction_manager::read_ref -transaction_manager::read_lock(block_address b) +transaction_manager::read_ref +transaction_manager::read_lock(block_address b) { return bm_->read_lock(b); } -template -typename transaction_manager::read_ref -transaction_manager::read_lock(block_address b, validator v) +transaction_manager::read_ref +transaction_manager::read_lock(block_address b, validator v) { return bm_->read_lock(b, v); } -template void -transaction_manager::add_shadow(block_address b) +transaction_manager::add_shadow(block_address b) { shadows_.insert(b); } -template void -transaction_manager::remove_shadow(block_address b) +transaction_manager::remove_shadow(block_address b) { shadows_.erase(b); } -template bool -transaction_manager::is_shadow(block_address b) const +transaction_manager::is_shadow(block_address b) const { return shadows_.count(b) > 0; } -template void -transaction_manager::wipe_shadow_table() +transaction_manager::wipe_shadow_table() { shadows_.clear(); } diff --git a/transaction_manager.h b/transaction_manager.h index 859f082..7a856bd 100644 --- a/transaction_manager.h +++ b/transaction_manager.h @@ -10,19 +10,18 @@ //---------------------------------------------------------------- namespace persistent_data { - template class transaction_manager : boost::noncopyable { public: - typedef boost::shared_ptr > ptr; - typedef typename block_manager::read_ref read_ref; - typedef typename block_manager::write_ref write_ref; - typedef typename block_manager::validator::ptr validator; + typedef boost::shared_ptr ptr; + typedef typename block_manager<>::read_ref read_ref; + typedef typename block_manager<>::write_ref write_ref; + typedef typename block_manager<>::validator::ptr validator; // If the space map is persistent, then the caller should // hold onto a reference and remember to call sm_->commit() // and update the superblock before dropping the superblock // reference. - transaction_manager(typename block_manager::ptr bm, + transaction_manager(typename block_manager<>::ptr bm, space_map::ptr sm); ~transaction_manager(); @@ -45,7 +44,7 @@ namespace persistent_data { return sm_; } - typename block_manager::ptr get_bm() { + typename block_manager<>::ptr get_bm() { return bm_; } @@ -55,15 +54,13 @@ namespace persistent_data { bool is_shadow(block_address b) const; void wipe_shadow_table(); - typename block_manager::ptr bm_; + typename block_manager<>::ptr bm_; space_map::ptr sm_; std::set shadows_; }; } -#include "transaction_manager.tcc" - //---------------------------------------------------------------- #endif diff --git a/unit-tests/btree_t.cc b/unit-tests/btree_t.cc index 776927e..997ef68 100644 --- a/unit-tests/btree_t.cc +++ b/unit-tests/btree_t.cc @@ -14,40 +14,39 @@ using namespace persistent_data; namespace { block_address const NR_BLOCKS = 102400; - transaction_manager<4096>::ptr + transaction_manager::ptr create_tm() { - block_manager<4096>::ptr bm(new block_manager<4096>("./test.data", NR_BLOCKS)); + block_manager<>::ptr bm(new block_manager<>("./test.data", NR_BLOCKS)); space_map::ptr sm(new core_map(NR_BLOCKS)); - transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); return tm; } - btree<1, uint64_traits, 4096>::ptr + btree<1, uint64_traits>::ptr create_btree() { uint64_traits::ref_counter rc; - return btree<1, uint64_traits, 4096>::ptr( - new btree<1, uint64_traits, 4096>( - create_tm(), rc)); + return btree<1, uint64_traits>::ptr( + new btree<1, uint64_traits>(create_tm(), rc)); } // Checks that a btree is well formed. // // i) No block should be in the tree more than once. // - class constraint_visitor : public btree<1, uint64_traits, 4096>::visitor { + class constraint_visitor : public btree<1, uint64_traits>::visitor { public: - bool visit_internal(unsigned level, bool is_root, btree_detail::node_ref const &n) { + bool visit_internal(unsigned level, bool is_root, btree_detail::node_ref const &n) { check_duplicate_block(n.get_location()); return true; } - bool visit_internal_leaf(unsigned level, bool is_root, btree_detail::node_ref const &n) { + bool visit_internal_leaf(unsigned level, bool is_root, btree_detail::node_ref const &n) { check_duplicate_block(n.get_location()); return true; } - bool visit_leaf(unsigned level, bool is_root, btree_detail::node_ref const &n) { + bool visit_leaf(unsigned level, bool is_root, btree_detail::node_ref const &n) { check_duplicate_block(n.get_location()); return true; } @@ -66,8 +65,8 @@ namespace { set seen_; }; - void check_constraints(btree<1, uint64_traits, 4096>::ptr tree) { - typedef btree<1, uint64_traits, 4096> tree_type; + void check_constraints(btree<1, uint64_traits>::ptr tree) { + typedef btree<1, uint64_traits> tree_type; tree_type::visitor::ptr v(new constraint_visitor); tree->visit(v); diff --git a/unit-tests/space_map_disk_t.cc b/unit-tests/space_map_disk_t.cc index 6977a1e..e91b525 100644 --- a/unit-tests/space_map_disk_t.cc +++ b/unit-tests/space_map_disk_t.cc @@ -13,22 +13,21 @@ using namespace persistent_data; namespace { block_address const NR_BLOCKS = 10237; block_address const SUPERBLOCK = 0; - unsigned const BLOCK_SIZE = 4096; - transaction_manager::ptr + transaction_manager::ptr create_tm() { - block_manager::ptr bm( - new block_manager("./test.data", NR_BLOCKS)); + block_manager<>::ptr bm( + new block_manager<>("./test.data", NR_BLOCKS)); space_map::ptr sm(new core_map(1024)); - transaction_manager::ptr tm( - new transaction_manager(bm, sm)); + transaction_manager::ptr tm( + new transaction_manager(bm, sm)); return tm; } persistent_space_map::ptr create_sm_disk() { auto tm = create_tm(); - return persistent_data::create_disk_sm(tm, NR_BLOCKS); + return persistent_data::create_disk_sm(tm, NR_BLOCKS); } } @@ -136,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_reopen) { auto tm = create_tm(); - auto sm = persistent_data::open_disk_sm(tm, buffer); + auto sm = persistent_data::open_disk_sm(tm, buffer); for (unsigned i = 0, step = 1; i < NR_BLOCKS; i += step, step++) BOOST_CHECK_EQUAL(sm->get_count(i), 1); diff --git a/unit-tests/transaction_manager_t.cc b/unit-tests/transaction_manager_t.cc index 657fa5d..4a80cfa 100644 --- a/unit-tests/transaction_manager_t.cc +++ b/unit-tests/transaction_manager_t.cc @@ -13,11 +13,11 @@ using namespace persistent_data; namespace { block_address const NR_BLOCKS = 1024; - transaction_manager<4096>::ptr + transaction_manager::ptr create_tm() { - block_manager<4096>::ptr bm(new block_manager<4096>("./test.data", NR_BLOCKS)); + block_manager<>::ptr bm(new block_manager<>("./test.data", NR_BLOCKS)); space_map::ptr sm(new core_map(NR_BLOCKS)); - transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); tm->get_sm()->inc(0); return tm; }