Merge pull request #141 from mingnus/thin-generate-metadata-wip
Complete the major features of thin_generate_metadata, and introduce thin_generate_mappings
This commit is contained in:
commit
1fd0bfd4e8
@ -40,6 +40,7 @@ SOURCE=\
|
||||
base/error_state.cc \
|
||||
base/error_string.cc \
|
||||
base/grid_layout.cc \
|
||||
base/io_generator.cc \
|
||||
base/file_utils.cc \
|
||||
base/progress_monitor.cc \
|
||||
base/rolling_hash.cc \
|
||||
@ -128,6 +129,7 @@ DEVTOOLS_SOURCE=\
|
||||
thin-provisioning/thin_ll_restore.cc \
|
||||
thin-provisioning/thin_show_duplicates.cc \
|
||||
thin-provisioning/thin_generate_metadata.cc \
|
||||
thin-provisioning/thin_generate_mappings.cc \
|
||||
thin-provisioning/variable_chunk_stream.cc \
|
||||
thin-provisioning/thin_show_metadata.cc \
|
||||
thin-provisioning/thin_scan.cc \
|
||||
|
25
base/io.h
Normal file
25
base/io.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef BASE_IO_H
|
||||
#define BASE_IO_H
|
||||
|
||||
#include "base/types.h"
|
||||
#include <stdint.h>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace base {
|
||||
enum req_op {
|
||||
REQ_OP_READ,
|
||||
REQ_OP_WRITE,
|
||||
REQ_OP_DISCARD
|
||||
};
|
||||
|
||||
struct io {
|
||||
unsigned op_;
|
||||
sector_t sector_;
|
||||
sector_t size_;
|
||||
};
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
240
base/io_generator.cc
Normal file
240
base/io_generator.cc
Normal file
@ -0,0 +1,240 @@
|
||||
#include "base/io_generator.h"
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
using namespace base;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
std::pair<char const*, io_pattern::pattern> patterns[] = {
|
||||
{"read", io_pattern::READ},
|
||||
{"write", io_pattern::WRITE},
|
||||
{"trim", io_pattern::TRIM},
|
||||
{"readwrite", io_pattern::READ_WRITE},
|
||||
{"trimwrite", io_pattern::TRIM_WRITE},
|
||||
{"randread", io_pattern::RAND_READ},
|
||||
{"randwrite", io_pattern::RAND_WRITE},
|
||||
{"randtrim", io_pattern::RAND_TRIM},
|
||||
{"randrw", io_pattern::RAND_RW},
|
||||
{"randtw", io_pattern::RAND_TW}
|
||||
};
|
||||
|
||||
unsigned const nr_patterns = sizeof(patterns) / sizeof(patterns[0]);
|
||||
|
||||
//--------------------------------
|
||||
|
||||
class offset_generator {
|
||||
public:
|
||||
typedef std::shared_ptr<offset_generator> ptr;
|
||||
|
||||
virtual base::sector_t next_offset() = 0;
|
||||
};
|
||||
|
||||
class sequential_offset_generator: public offset_generator {
|
||||
public:
|
||||
sequential_offset_generator(base::sector_t offset,
|
||||
base::sector_t size,
|
||||
base::sector_t block_size)
|
||||
: block_size_(block_size),
|
||||
begin_(offset),
|
||||
end_(offset + size),
|
||||
current_(offset) {
|
||||
if (size < block_size)
|
||||
throw std::runtime_error("size must be greater than block_size");
|
||||
}
|
||||
|
||||
base::sector_t next_offset() {
|
||||
sector_t r = current_;
|
||||
current_ += block_size_;
|
||||
if (current_ > end_)
|
||||
current_ = begin_;
|
||||
return r;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned block_size_;
|
||||
base::sector_t begin_;
|
||||
base::sector_t end_;
|
||||
base::sector_t current_;
|
||||
};
|
||||
|
||||
class random_offset_generator: public offset_generator {
|
||||
public:
|
||||
random_offset_generator(sector_t offset,
|
||||
sector_t size,
|
||||
sector_t block_size)
|
||||
: block_begin_(offset / block_size),
|
||||
nr_blocks_(size / block_size),
|
||||
block_size_(block_size) {
|
||||
}
|
||||
|
||||
sector_t next_offset() {
|
||||
return ((std::rand() % nr_blocks_) + block_begin_) * block_size_;
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t block_begin_;
|
||||
uint64_t nr_blocks_;
|
||||
unsigned block_size_;
|
||||
};
|
||||
|
||||
//--------------------------------
|
||||
|
||||
class op_generator {
|
||||
public:
|
||||
typedef std::shared_ptr<op_generator> ptr;
|
||||
|
||||
op_generator(base::req_op op1)
|
||||
: op1_(op1), op2_(op1), op1_pct_(100) {
|
||||
}
|
||||
|
||||
op_generator(base::req_op op1,
|
||||
base::req_op op2,
|
||||
unsigned op1_pct)
|
||||
: op1_(op1), op2_(op2), op1_pct_(op1_pct) {
|
||||
if (op1_pct > 100)
|
||||
throw std::runtime_error("invalid percentage");
|
||||
}
|
||||
|
||||
base::req_op next_op() {
|
||||
if (static_cast<unsigned>(std::rand()) % 100 > op1_pct_)
|
||||
return op2_;
|
||||
return op1_;
|
||||
}
|
||||
|
||||
private:
|
||||
base::req_op op1_;
|
||||
base::req_op op2_;
|
||||
unsigned op1_pct_;
|
||||
};
|
||||
|
||||
//--------------------------------
|
||||
|
||||
class base_io_generator: public io_generator {
|
||||
public:
|
||||
base_io_generator(io_generator_options const &opts);
|
||||
virtual bool has_next();
|
||||
virtual void next(base::io &next_io);
|
||||
|
||||
private:
|
||||
offset_generator::ptr
|
||||
create_offset_generator(io_generator_options const &opts);
|
||||
|
||||
op_generator::ptr
|
||||
create_op_generator(io_generator_options const &opts);
|
||||
|
||||
offset_generator::ptr offset_gen_;
|
||||
op_generator::ptr op_gen_;
|
||||
sector_t block_size_;
|
||||
size_t io_size_finished_;
|
||||
size_t io_size_total_;
|
||||
};
|
||||
|
||||
base_io_generator::base_io_generator(io_generator_options const &opts)
|
||||
: offset_gen_(create_offset_generator(opts)),
|
||||
op_gen_(create_op_generator(opts)),
|
||||
block_size_(opts.block_size_),
|
||||
io_size_finished_(0),
|
||||
io_size_total_(opts.io_size_) {
|
||||
}
|
||||
|
||||
bool base_io_generator::has_next() {
|
||||
return io_size_finished_ < io_size_total_;
|
||||
}
|
||||
|
||||
void base_io_generator::next(base::io &next_io) {
|
||||
if (io_size_finished_ >= io_size_total_)
|
||||
throw std::runtime_error("");
|
||||
|
||||
next_io.op_ = op_gen_->next_op();
|
||||
next_io.sector_ = offset_gen_->next_offset();
|
||||
next_io.size_ = block_size_;
|
||||
|
||||
io_size_finished_ += block_size_;
|
||||
}
|
||||
|
||||
offset_generator::ptr
|
||||
base_io_generator::create_offset_generator(io_generator_options const &opts) {
|
||||
if (opts.pattern_.is_random())
|
||||
return offset_generator::ptr(
|
||||
new random_offset_generator(opts.offset_,
|
||||
opts.size_,
|
||||
opts.block_size_));
|
||||
|
||||
return offset_generator::ptr(
|
||||
new sequential_offset_generator(opts.offset_,
|
||||
opts.size_,
|
||||
opts.block_size_));
|
||||
}
|
||||
|
||||
op_generator::ptr
|
||||
base_io_generator::create_op_generator(io_generator_options const &opts) {
|
||||
// FIXME: elimiate the switch-case and hide enum values
|
||||
switch (opts.pattern_.val_) {
|
||||
case io_pattern::READ:
|
||||
case io_pattern::RAND_READ:
|
||||
return op_generator::ptr(new op_generator(base::REQ_OP_READ));
|
||||
case io_pattern::WRITE:
|
||||
case io_pattern::RAND_WRITE:
|
||||
return op_generator::ptr(new op_generator(base::REQ_OP_WRITE));
|
||||
case io_pattern::TRIM:
|
||||
case io_pattern::RAND_TRIM:
|
||||
return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD));
|
||||
case io_pattern::READ_WRITE:
|
||||
case io_pattern::RAND_RW:
|
||||
return op_generator::ptr(new op_generator(base::REQ_OP_READ,
|
||||
base::REQ_OP_WRITE,
|
||||
50));
|
||||
case io_pattern::TRIM_WRITE:
|
||||
case io_pattern::RAND_TW:
|
||||
return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD,
|
||||
base::REQ_OP_WRITE,
|
||||
50));
|
||||
default:
|
||||
throw std::runtime_error("unknown pattern");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
io_pattern::io_pattern()
|
||||
: val_(pattern::READ) {
|
||||
}
|
||||
|
||||
io_pattern::io_pattern(char const *pattern) {
|
||||
parse(pattern);
|
||||
}
|
||||
|
||||
void
|
||||
io_pattern::parse(char const *pattern) {
|
||||
bool found = false;
|
||||
unsigned i = 0;
|
||||
for (i = 0; i < nr_patterns; i++) {
|
||||
if (!strcmp(patterns[i].first, pattern)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
throw std::runtime_error("unknow pattern");
|
||||
|
||||
val_ = patterns[i].second;
|
||||
}
|
||||
|
||||
bool
|
||||
io_pattern::is_random() const {
|
||||
return val_ & pattern::RANDOM;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
io_generator::ptr
|
||||
base::create_io_generator(io_generator_options const &opts) {
|
||||
return io_generator::ptr(new base_io_generator(opts));
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
55
base/io_generator.h
Normal file
55
base/io_generator.h
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef BASE_IO_GENERATOR_H
|
||||
#define BASE_IO_GENERATOR_H
|
||||
|
||||
#include "base/io.h"
|
||||
#include <memory>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace base {
|
||||
struct io_pattern {
|
||||
enum pattern {
|
||||
READ = 1 << 1,
|
||||
WRITE = 1 << 2,
|
||||
TRIM = 1 << 3,
|
||||
RANDOM = 1 << 8,
|
||||
READ_WRITE = READ | WRITE,
|
||||
TRIM_WRITE = WRITE | TRIM,
|
||||
RAND_READ = READ | RANDOM,
|
||||
RAND_WRITE = WRITE | RANDOM,
|
||||
RAND_TRIM = TRIM | RANDOM,
|
||||
RAND_RW = READ_WRITE | RANDOM,
|
||||
RAND_TW = TRIM_WRITE | RANDOM,
|
||||
};
|
||||
|
||||
io_pattern();
|
||||
io_pattern(char const *pattern);
|
||||
void parse(char const *pattern);
|
||||
bool is_random() const;
|
||||
|
||||
pattern val_;
|
||||
};
|
||||
|
||||
struct io_generator_options {
|
||||
io_pattern pattern_;
|
||||
sector_t offset_;
|
||||
sector_t block_size_;
|
||||
sector_t size_;
|
||||
sector_t io_size_;
|
||||
};
|
||||
|
||||
class io_generator {
|
||||
public:
|
||||
typedef std::shared_ptr<io_generator> ptr;
|
||||
|
||||
virtual bool has_next() = 0;
|
||||
virtual void next(base::io &next_io) = 0;
|
||||
};
|
||||
|
||||
io_generator::ptr
|
||||
create_io_generator(io_generator_options const &opts);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
@ -16,8 +16,8 @@
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef THINP_MATH_H
|
||||
#define THINP_MATH_H
|
||||
#ifndef BASE_MATH_H
|
||||
#define BASE_MATH_H
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
@ -34,6 +34,11 @@ namespace base {
|
||||
T div_down(T const &v, T const &divisor) {
|
||||
return v / divisor;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool is_power_of_two(T const v) {
|
||||
return !(v & (v - 1));
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
15
base/types.h
Normal file
15
base/types.h
Normal file
@ -0,0 +1,15 @@
|
||||
#ifndef BASE_TYPES_H
|
||||
#define BASE_TYPES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace base {
|
||||
using sector_t = uint64_t;
|
||||
unsigned const SECTOR_SHIFT = 9;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
@ -24,7 +24,6 @@ namespace bi = boost::intrusive;
|
||||
|
||||
namespace bcache {
|
||||
typedef uint64_t block_address;
|
||||
typedef uint64_t sector_t;
|
||||
|
||||
class validator {
|
||||
public:
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef BLOCK_CACHE_IO_ENGINE_H
|
||||
#define BLOCK_CACHE_IO_ENGINE_H
|
||||
|
||||
#include "base/types.h"
|
||||
#include "base/unique_handle.h"
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
@ -18,9 +19,8 @@
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace bcache {
|
||||
using sector_t = uint64_t;
|
||||
|
||||
unsigned const SECTOR_SHIFT = 9;
|
||||
using base::sector_t;
|
||||
using base::SECTOR_SHIFT;
|
||||
|
||||
// Virtual base class to aid unit testing
|
||||
class io_engine {
|
||||
|
@ -34,8 +34,6 @@ namespace cache {
|
||||
|
||||
block_address const SUPERBLOCK_LOCATION = 0;
|
||||
|
||||
typedef uint64_t sector_t;
|
||||
|
||||
//------------------------------------------------
|
||||
|
||||
class space_map_ref_counter {
|
||||
|
@ -19,7 +19,7 @@
|
||||
#ifndef ARRAY_H
|
||||
#define ARRAY_H
|
||||
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
#include "persistent-data/data-structures/btree.h"
|
||||
#include "persistent-data/data-structures/btree_counter.h"
|
||||
#include "persistent-data/data-structures/btree_damage_visitor.h"
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "persistent-data/data-structures/array.h"
|
||||
#include "persistent-data/data-structures/bitset.h"
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
|
||||
using namespace persistent_data;
|
||||
using namespace persistent_data::bitset_detail;
|
||||
|
373
persistent-data/data-structures/btree-remove.tcc
Normal file
373
persistent-data/data-structures/btree-remove.tcc
Normal file
@ -0,0 +1,373 @@
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace persistent_data {
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
btree_detail::shadow_child
|
||||
btree<Levels, ValueTraits>::
|
||||
create_shadow_child(internal_node &parent,
|
||||
unsigned index)
|
||||
{
|
||||
block_address b = parent.value_at(index);
|
||||
|
||||
pair<write_ref, bool> p = tm_.shadow(b, validator_);
|
||||
write_ref &wr = p.first;
|
||||
btree_detail::node_type type;
|
||||
|
||||
node_ref<block_traits> n = to_node<block_traits>(wr);
|
||||
if (n.get_type() == btree_detail::INTERNAL) {
|
||||
type = btree_detail::INTERNAL;
|
||||
if (p.second)
|
||||
n.inc_children(internal_rc_);
|
||||
} else {
|
||||
type = btree_detail::LEAF;
|
||||
if (p.second) {
|
||||
node_ref<ValueTraits> leaf = to_node<ValueTraits>(wr);
|
||||
leaf.inc_children(rc_);
|
||||
}
|
||||
}
|
||||
|
||||
parent.set_value(index, wr.get_location());
|
||||
|
||||
return btree_detail::shadow_child(wr, type);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::
|
||||
remove(key const &key)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
block_address block = root_;
|
||||
unsigned index = 0;
|
||||
shadow_spine spine(tm_, validator_);
|
||||
bool need_remove = true;
|
||||
|
||||
for (unsigned level = 0; level < Levels - 1; ++level) {
|
||||
need_remove = remove_location<block_traits>(spine, block,
|
||||
key[level], &index,
|
||||
internal_rc_);
|
||||
if (!need_remove)
|
||||
break;
|
||||
|
||||
internal_node n = spine.get_node<block_traits>();
|
||||
block = n.value_at(index);
|
||||
}
|
||||
|
||||
if (need_remove) {
|
||||
need_remove = remove_location<ValueTraits>(spine, block,
|
||||
key[Levels - 1], &index,
|
||||
rc_);
|
||||
if (need_remove) {
|
||||
leaf_node leaf = spine.get_node<ValueTraits>();
|
||||
leaf.delete_at(index);
|
||||
}
|
||||
}
|
||||
|
||||
root_ = spine.get_root();
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits, typename RC>
|
||||
bool
|
||||
btree<Levels, _>::
|
||||
remove_location(btree_detail::shadow_spine &spine,
|
||||
block_address block,
|
||||
uint64_t key,
|
||||
unsigned *index,
|
||||
RC &leaf_rc)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
unsigned i = 0;
|
||||
bool r = false;
|
||||
|
||||
for (;;) {
|
||||
r = spine.step(block);
|
||||
|
||||
// patch up the parent to point to the new shadow
|
||||
if (spine.has_parent()) {
|
||||
internal_node p = spine.get_parent();
|
||||
p.set_value(i, spine.get_block());
|
||||
}
|
||||
|
||||
internal_node n = spine.get_node<block_traits>();
|
||||
if (n.get_type() == btree_detail::LEAF) {
|
||||
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
|
||||
boost::optional<unsigned> idx = leaf.exact_search(key);
|
||||
if (!idx)
|
||||
return false;
|
||||
*index = *idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
r = rebalance_children<ValueTraits>(spine, key);
|
||||
if (!r)
|
||||
break;
|
||||
|
||||
n = spine.get_node<block_traits>();
|
||||
if (n.get_type() == btree_detail::LEAF) {
|
||||
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
|
||||
boost::optional<unsigned> idx = leaf.exact_search(key);
|
||||
if (!idx)
|
||||
return false;
|
||||
*index = *idx;
|
||||
return true;
|
||||
}
|
||||
|
||||
i = n.lower_bound(key);
|
||||
block = n.value_at(i);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
bool
|
||||
btree<Levels, _>::
|
||||
rebalance_children(btree_detail::shadow_spine &spine, uint64_t key)
|
||||
{
|
||||
internal_node n = spine.get_node<block_traits>();
|
||||
|
||||
if (n.get_nr_entries() == 1) {
|
||||
block_address b = n.value_at(0);
|
||||
read_ref child = tm_.read_lock(b, validator_);
|
||||
|
||||
// FIXME: is it safe?
|
||||
::memcpy(n.raw(), child.data(), read_ref::BLOCK_SIZE);
|
||||
|
||||
tm_.get_sm()->dec(child.get_location());
|
||||
return true;
|
||||
}
|
||||
|
||||
int i = n.lower_bound(key);
|
||||
if (i < 0)
|
||||
return false;
|
||||
|
||||
bool has_left_sibling = i > 0;
|
||||
bool has_right_sibling = static_cast<unsigned>(i) < (n.get_nr_entries() - 1);
|
||||
|
||||
if (!has_left_sibling)
|
||||
rebalance2<ValueTraits>(spine, i);
|
||||
else if (!has_right_sibling)
|
||||
rebalance2<ValueTraits>(spine, i - 1);
|
||||
else
|
||||
rebalance3<ValueTraits>(spine, i - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
rebalance2(btree_detail::shadow_spine &spine, unsigned left_index)
|
||||
{
|
||||
internal_node parent = spine.get_node<block_traits>();
|
||||
shadow_child left = create_shadow_child(parent, left_index);
|
||||
shadow_child right = create_shadow_child(parent, left_index + 1);
|
||||
|
||||
// FIXME: ugly
|
||||
if (left.get_type() == btree_detail::INTERNAL) {
|
||||
internal_node l = left.get_node<block_traits>();
|
||||
internal_node r = right.get_node<block_traits>();
|
||||
__rebalance2(parent, l, r, left_index);
|
||||
} else {
|
||||
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
|
||||
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
|
||||
__rebalance2(parent, l, r, left_index);
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
__rebalance2(internal_node &parent,
|
||||
node_ref<ValueTraits> &left,
|
||||
node_ref<ValueTraits> &right,
|
||||
unsigned left_index)
|
||||
{
|
||||
unsigned nr_left = left.get_nr_entries();
|
||||
unsigned nr_right = right.get_nr_entries();
|
||||
unsigned right_index = left_index + 1;
|
||||
|
||||
unsigned threshold = 2 * (left.merge_threshold() + 1);
|
||||
if (nr_left + nr_right < threshold) {
|
||||
// Merge the right child into the left
|
||||
left.copy_entries_to_left(right, nr_right);
|
||||
left.set_nr_entries(nr_left + nr_right);
|
||||
parent.delete_at(right_index);
|
||||
tm_.get_sm()->dec(right.get_location());
|
||||
} else {
|
||||
// Rebalance
|
||||
unsigned target_left = (nr_left + nr_right) / 2;
|
||||
left.move_entries(right, nr_left - target_left);
|
||||
parent.set_key(right_index, right.key_at(0));
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
rebalance3(btree_detail::shadow_spine &spine, unsigned left_index)
|
||||
{
|
||||
internal_node parent = spine.get_node<block_traits>();
|
||||
shadow_child left = create_shadow_child(parent, left_index);
|
||||
shadow_child center = create_shadow_child(parent, left_index + 1);
|
||||
shadow_child right = create_shadow_child(parent, left_index + 2);
|
||||
|
||||
// FIXME: ugly
|
||||
if (left.get_type() == btree_detail::INTERNAL) {
|
||||
internal_node l = left.get_node<block_traits>();
|
||||
internal_node c = center.get_node<block_traits>();
|
||||
internal_node r = right.get_node<block_traits>();
|
||||
__rebalance3(parent, l, c, r, left_index);
|
||||
} else {
|
||||
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
|
||||
node_ref<ValueTraits> c = center.get_node<ValueTraits>();
|
||||
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
|
||||
__rebalance3(parent, l, c, r, left_index);
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
__rebalance3(internal_node &parent,
|
||||
node_ref<ValueTraits> &left,
|
||||
node_ref<ValueTraits> ¢er,
|
||||
node_ref<ValueTraits> &right,
|
||||
unsigned left_index)
|
||||
{
|
||||
unsigned nr_left = left.get_nr_entries();
|
||||
unsigned nr_center = center.get_nr_entries();
|
||||
unsigned nr_right = right.get_nr_entries();
|
||||
|
||||
unsigned threshold = left.merge_threshold() * 4 + 1;
|
||||
|
||||
if ((nr_left + nr_center + nr_right) < threshold)
|
||||
delete_center_node(parent, left, center, right, left_index);
|
||||
else
|
||||
redistribute3(parent, left, center, right, left_index);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
delete_center_node(internal_node &parent,
|
||||
node_ref<ValueTraits> &left,
|
||||
node_ref<ValueTraits> ¢er,
|
||||
node_ref<ValueTraits> &right,
|
||||
unsigned left_index)
|
||||
{
|
||||
unsigned center_index = left_index + 1;
|
||||
unsigned right_index = left_index + 2;
|
||||
|
||||
unsigned max_entries = left.get_max_entries();
|
||||
unsigned nr_left = left.get_nr_entries();
|
||||
unsigned nr_center = center.get_nr_entries();
|
||||
unsigned nr_right = right.get_nr_entries();
|
||||
unsigned shift = std::min(max_entries - nr_left, nr_center);
|
||||
|
||||
if (nr_left + shift > max_entries)
|
||||
throw std::runtime_error("too many entries");
|
||||
|
||||
left.copy_entries_to_left(center, shift);
|
||||
left.set_nr_entries(nr_left + shift);
|
||||
|
||||
if (shift != nr_center) {
|
||||
shift = nr_center - shift;
|
||||
if ((nr_right + shift) > max_entries)
|
||||
throw std::runtime_error("too many entries");
|
||||
right.shift_entries_right(shift);
|
||||
center.copy_entries_to_right(right, shift);
|
||||
right.set_nr_entries(nr_right + shift);
|
||||
}
|
||||
parent.set_key(right_index, right.key_at(0));
|
||||
|
||||
parent.delete_at(center_index);
|
||||
--right_index;
|
||||
|
||||
tm_.get_sm()->dec(center.get_location());
|
||||
__rebalance2(parent, left, right, left_index);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
redistribute3(internal_node &parent,
|
||||
node_ref<ValueTraits> &left,
|
||||
node_ref<ValueTraits> ¢er,
|
||||
node_ref<ValueTraits> &right,
|
||||
unsigned left_index)
|
||||
{
|
||||
unsigned center_index = left_index + 1;
|
||||
unsigned right_index = left_index + 2;
|
||||
|
||||
unsigned nr_left = left.get_nr_entries();
|
||||
unsigned nr_center = center.get_nr_entries();
|
||||
unsigned nr_right = right.get_nr_entries();
|
||||
|
||||
unsigned max_entries = left.get_max_entries();
|
||||
unsigned total = nr_left + nr_center + nr_right;
|
||||
unsigned target_right = total / 3;
|
||||
unsigned remainder = (target_right * 3) != total;
|
||||
unsigned target_left = target_right + remainder;
|
||||
|
||||
if (target_left > max_entries || target_right > max_entries)
|
||||
throw std::runtime_error("too many entries");
|
||||
|
||||
if (nr_left < nr_right) {
|
||||
int s = nr_left - target_left;
|
||||
|
||||
// FIXME: signed & unsigned comparison
|
||||
if (s < 0 && nr_center < static_cast<unsigned>(-s)) {
|
||||
// not enough in central node
|
||||
left.move_entries(center, -nr_center);
|
||||
s += nr_center;
|
||||
left.move_entries(right, s);
|
||||
nr_right += s;
|
||||
} else
|
||||
left.move_entries(center, s);
|
||||
|
||||
center.move_entries(right, target_right - nr_right);
|
||||
|
||||
} else {
|
||||
int s = target_right - nr_right;
|
||||
|
||||
if (s > 0 && nr_center < static_cast<unsigned>(s)) {
|
||||
// not enough in central node
|
||||
center.move_entries(right, nr_center);
|
||||
s -= nr_center;
|
||||
left.move_entries(right, s);
|
||||
nr_left -= s;
|
||||
} else
|
||||
center.move_entries(right, s);
|
||||
|
||||
left.move_entries(center, nr_left - target_left);
|
||||
}
|
||||
|
||||
parent.set_key(center_index, center.key_at(0));
|
||||
parent.set_key(right_index, right.key_at(0));
|
||||
}
|
||||
};
|
@ -110,12 +110,34 @@ namespace persistent_data {
|
||||
uint64_t key,
|
||||
typename ValueTraits::value_type const &v);
|
||||
|
||||
// Decrements the nr_entries field
|
||||
void delete_at(unsigned i);
|
||||
|
||||
// Copies entries from another node, appends them
|
||||
// to the back of this node. Adjusts nr_entries.
|
||||
void copy_entries(node_ref const &rhs,
|
||||
unsigned begin,
|
||||
unsigned end);
|
||||
|
||||
// Moves entries between the sibling node,
|
||||
// and maintains the key ordering.
|
||||
// The nr_entreis of both nodes are adjusted.
|
||||
void move_entries(node_ref &rhs,
|
||||
int count);
|
||||
|
||||
// Copies entries from the beginning of rhs to the end of lhs,
|
||||
// or copies entries from the end of lhs to the beginning of rhs.
|
||||
// The nr_entries is not adjusted.
|
||||
void copy_entries_to_left(node_ref const &rhs, unsigned count);
|
||||
void copy_entries_to_right(node_ref &rhs, unsigned count) const;
|
||||
|
||||
// Shifts entries to left or right.
|
||||
// The nr_entries is not adjusted.
|
||||
void shift_entries_left(unsigned shift);
|
||||
void shift_entries_right(unsigned shift);
|
||||
|
||||
unsigned merge_threshold() const;
|
||||
|
||||
// Various searches
|
||||
int bsearch(uint64_t key, int want_hi) const;
|
||||
boost::optional<unsigned> exact_search(uint64_t key) const;
|
||||
@ -124,6 +146,9 @@ namespace persistent_data {
|
||||
template <typename RefCounter>
|
||||
void inc_children(RefCounter &rc);
|
||||
|
||||
template <typename RefCounter>
|
||||
void dec_children(RefCounter &rc);
|
||||
|
||||
disk_node *raw() {
|
||||
return raw_;
|
||||
}
|
||||
@ -256,6 +281,26 @@ namespace persistent_data {
|
||||
maybe_block root_;
|
||||
};
|
||||
|
||||
class shadow_child {
|
||||
public:
|
||||
shadow_child(block_manager::write_ref &wr, node_type type)
|
||||
: wr_(wr), type_(type) {
|
||||
}
|
||||
|
||||
node_type get_type() const {
|
||||
return type_;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits> get_node() {
|
||||
return to_node<ValueTraits>(wr_);
|
||||
}
|
||||
|
||||
private:
|
||||
block_manager::write_ref wr_;
|
||||
node_type type_;
|
||||
};
|
||||
|
||||
// Used to keep a record of a nested btree's position.
|
||||
typedef std::vector<uint64_t> btree_path;
|
||||
|
||||
@ -396,6 +441,14 @@ namespace persistent_data {
|
||||
int *index,
|
||||
RC &leaf_rc);
|
||||
|
||||
template <typename ValueTraits2, typename RC>
|
||||
bool
|
||||
remove_location(btree_detail::shadow_spine &spine,
|
||||
block_address block,
|
||||
uint64_t key,
|
||||
unsigned *index,
|
||||
RC &leaf_rc);
|
||||
|
||||
void walk_tree(visitor &visitor,
|
||||
btree_detail::node_location const &loc,
|
||||
block_address b) const;
|
||||
@ -408,6 +461,53 @@ namespace persistent_data {
|
||||
void inc_children(btree_detail::shadow_spine &spine,
|
||||
RefCounter &leaf_rc);
|
||||
|
||||
btree_detail::shadow_child
|
||||
create_shadow_child(internal_node &parent,
|
||||
unsigned index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
bool rebalance_children(btree_detail::shadow_spine &spine,
|
||||
uint64_t key);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void rebalance2(btree_detail::shadow_spine &spine,
|
||||
unsigned left_index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void rebalance3(btree_detail::shadow_spine &spine,
|
||||
unsigned left_index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void
|
||||
__rebalance2(internal_node &parent,
|
||||
btree_detail::node_ref<ValueTraits2> &left,
|
||||
btree_detail::node_ref<ValueTraits2> &right,
|
||||
unsigned left_index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void
|
||||
__rebalance3(internal_node &parent,
|
||||
btree_detail::node_ref<ValueTraits2> &left,
|
||||
btree_detail::node_ref<ValueTraits2> ¢er,
|
||||
btree_detail::node_ref<ValueTraits2> &right,
|
||||
unsigned left_index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void
|
||||
delete_center_node(internal_node &parent,
|
||||
btree_detail::node_ref<ValueTraits2> &left,
|
||||
btree_detail::node_ref<ValueTraits2> ¢er,
|
||||
btree_detail::node_ref<ValueTraits2> &right,
|
||||
unsigned left_index);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void
|
||||
redistribute3(internal_node &parent,
|
||||
btree_detail::node_ref<ValueTraits2> &left,
|
||||
btree_detail::node_ref<ValueTraits2> ¢er,
|
||||
btree_detail::node_ref<ValueTraits2> &right,
|
||||
unsigned left_index);
|
||||
|
||||
transaction_manager &tm_;
|
||||
bool destroy_;
|
||||
block_address root_;
|
||||
@ -418,6 +518,7 @@ namespace persistent_data {
|
||||
};
|
||||
|
||||
#include "btree.tcc"
|
||||
#include "btree-remove.tcc"
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
@ -33,6 +34,56 @@ namespace {
|
||||
using namespace persistent_data;
|
||||
using namespace btree_detail;
|
||||
using namespace std;
|
||||
|
||||
struct frame {
|
||||
frame(block_address blocknr,
|
||||
uint32_t level,
|
||||
uint32_t nr_entries)
|
||||
: blocknr_(blocknr),
|
||||
level_(level),
|
||||
nr_entries_(nr_entries),
|
||||
current_child_(0) {
|
||||
}
|
||||
block_address blocknr_;
|
||||
uint32_t level_;
|
||||
uint32_t nr_entries_;
|
||||
uint32_t current_child_;
|
||||
};
|
||||
|
||||
// stack for postorder DFS traversal
|
||||
// TODO: Refactor it into a spine-like class, e.g., btree_del_spine,
|
||||
// "Spine" sounds better for btree operations.
|
||||
struct btree_del_stack {
|
||||
public:
|
||||
btree_del_stack(transaction_manager &tm): tm_(tm) {
|
||||
}
|
||||
|
||||
void push_frame(block_address blocknr,
|
||||
uint32_t level,
|
||||
uint32_t nr_entries) {
|
||||
if (tm_.get_sm()->get_count(blocknr) > 1)
|
||||
tm_.get_sm()->dec(blocknr);
|
||||
else
|
||||
spine_.push(frame(blocknr, level, nr_entries));
|
||||
}
|
||||
|
||||
void pop_frame() {
|
||||
tm_.get_sm()->dec(spine_.top().blocknr_);
|
||||
spine_.pop();
|
||||
}
|
||||
|
||||
frame &top_frame() {
|
||||
return spine_.top();
|
||||
}
|
||||
|
||||
bool is_empty() {
|
||||
return spine_.empty();
|
||||
}
|
||||
|
||||
private:
|
||||
transaction_manager &tm_;
|
||||
std::stack<frame> spine_;
|
||||
};
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
@ -242,6 +293,23 @@ namespace persistent_data {
|
||||
set_value(i, v);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::delete_at(unsigned i)
|
||||
{
|
||||
unsigned nr_entries = get_nr_entries();
|
||||
if (i >= nr_entries)
|
||||
throw runtime_error("key index out of bounds");
|
||||
unsigned nr_to_copy = nr_entries - (i + 1);
|
||||
|
||||
if (nr_to_copy) {
|
||||
::memmove(key_ptr(i), key_ptr(i + 1), sizeof(uint64_t) * nr_to_copy);
|
||||
::memmove(value_ptr(i), value_ptr(i + 1), sizeof(typename ValueTraits::disk_type) * nr_to_copy);
|
||||
}
|
||||
|
||||
set_nr_entries(nr_entries - 1);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::copy_entries(node_ref const &rhs,
|
||||
@ -258,6 +326,90 @@ namespace persistent_data {
|
||||
set_nr_entries(n + count);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::move_entries(node_ref<ValueTraits> &rhs,
|
||||
int count)
|
||||
{
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
unsigned nr_left = get_nr_entries();
|
||||
unsigned nr_right = rhs.get_nr_entries();
|
||||
unsigned max_entries = get_max_entries();
|
||||
|
||||
if (nr_left - count > max_entries || nr_right - count > max_entries)
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
if (count > 0) {
|
||||
rhs.shift_entries_right(count);
|
||||
copy_entries_to_right(rhs, count);
|
||||
} else {
|
||||
copy_entries_to_left(rhs, -count);
|
||||
rhs.shift_entries_left(-count);
|
||||
}
|
||||
|
||||
set_nr_entries(nr_left - count);
|
||||
rhs.set_nr_entries(nr_right + count);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::copy_entries_to_left(node_ref const &rhs, unsigned count)
|
||||
{
|
||||
unsigned n = get_nr_entries();
|
||||
if ((n + count) > get_max_entries())
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
::memcpy(key_ptr(n), rhs.key_ptr(0), sizeof(uint64_t) * count);
|
||||
::memcpy(value_ptr(n), rhs.value_ptr(0), sizeof(typename ValueTraits::disk_type) * count);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::copy_entries_to_right(node_ref &rhs, unsigned count) const
|
||||
{
|
||||
unsigned n = rhs.get_nr_entries();
|
||||
if ((n + count) > get_max_entries())
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
unsigned nr_left = get_nr_entries();
|
||||
::memcpy(rhs.key_ptr(0), key_ptr(nr_left - count), sizeof(uint64_t) * count);
|
||||
::memcpy(rhs.value_ptr(0), value_ptr(nr_left - count), sizeof(typename ValueTraits::disk_type) * count);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::shift_entries_left(unsigned shift)
|
||||
{
|
||||
unsigned n = get_nr_entries();
|
||||
if (shift > n)
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
unsigned nr_shifted = n - shift;
|
||||
::memmove(key_ptr(0), key_ptr(shift), sizeof(uint64_t) * nr_shifted);
|
||||
::memmove(value_ptr(0), value_ptr(shift), sizeof(typename ValueTraits::disk_type) * nr_shifted);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::shift_entries_right(unsigned shift)
|
||||
{
|
||||
unsigned n = get_nr_entries();
|
||||
if (n + shift > get_max_entries())
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
::memmove(key_ptr(shift), key_ptr(0), sizeof(uint64_t) * n);
|
||||
::memmove(value_ptr(shift), value_ptr(0), sizeof(typename ValueTraits::disk_type) * n);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
unsigned
|
||||
node_ref<ValueTraits>::merge_threshold() const
|
||||
{
|
||||
return get_max_entries() / 3;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
int
|
||||
node_ref<ValueTraits>::bsearch(uint64_t key, int want_hi) const
|
||||
@ -348,6 +500,21 @@ namespace persistent_data {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
template <typename RefCounter>
|
||||
void
|
||||
node_ref<ValueTraits>::dec_children(RefCounter &rc)
|
||||
{
|
||||
unsigned nr_entries = get_nr_entries();
|
||||
for (unsigned i = 0; i < nr_entries; i++) {
|
||||
typename ValueTraits::value_type v;
|
||||
typename ValueTraits::disk_type d;
|
||||
::memcpy(&d, value_ptr(i), sizeof(d));
|
||||
ValueTraits::unpack(d, v);
|
||||
rc.dec(v);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
bool
|
||||
node_ref<ValueTraits>::value_sizes_match() const {
|
||||
@ -535,13 +702,6 @@ namespace persistent_data {
|
||||
return need_insert;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::remove(key const &key)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
block_address
|
||||
btree<Levels, ValueTraits>::get_root() const
|
||||
@ -565,15 +725,57 @@ namespace persistent_data {
|
||||
return ptr(new btree<Levels, ValueTraits>(tm_, root_, rc_));
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::destroy()
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
btree_del_stack s(tm_);
|
||||
|
||||
{
|
||||
read_ref blk = tm_.read_lock(root_, validator_);
|
||||
internal_node n = to_node<block_traits>(blk);
|
||||
s.push_frame(root_, 0, n.get_nr_entries());
|
||||
}
|
||||
|
||||
while (!s.is_empty()) {
|
||||
frame &f = s.top_frame();
|
||||
|
||||
if (f.current_child_ >= f.nr_entries_) {
|
||||
s.pop_frame();
|
||||
continue;
|
||||
}
|
||||
|
||||
// FIXME: Cache the read_ref object in the stack to avoid temporary objects?
|
||||
read_ref current = tm_.read_lock(f.blocknr_, validator_);
|
||||
internal_node n = to_node<block_traits>(current);
|
||||
|
||||
if (n.get_type() == INTERNAL) {
|
||||
// TODO: test performance penalty of prefetching
|
||||
//if (!f.current_child_)
|
||||
// for (unsigned i = 0; i < n.get_nr_entries(); i++)
|
||||
// tm_.prefetch(n.value_at(i));
|
||||
|
||||
block_address b = n.value_at(f.current_child_);
|
||||
read_ref leaf = tm_.read_lock(b, validator_);
|
||||
internal_node o = to_node<block_traits>(leaf);
|
||||
s.push_frame(b, f.level_, o.get_nr_entries());
|
||||
++f.current_child_;
|
||||
// internal leaf
|
||||
} else if (f.level_ < Levels - 1) {
|
||||
block_address b = n.value_at(f.current_child_);
|
||||
read_ref leaf = tm_.read_lock(b, validator_);
|
||||
internal_node o = to_node<block_traits>(leaf);
|
||||
s.push_frame(b, f.level_ + 1, o.get_nr_entries());
|
||||
++f.current_child_;
|
||||
} else {
|
||||
leaf_node o = to_node<ValueTraits>(current);
|
||||
o.dec_children(rc_); // FIXME: move this into pop_frame()
|
||||
s.pop_frame();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits, typename Search>
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
#include "persistent-data/file_utils.h"
|
||||
#include "persistent-data/space-maps/core.h"
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "persistent-data/space-maps/core.h"
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include "persistent-data/data-structures/btree_damage_visitor.h"
|
||||
#include "persistent-data/data-structures/btree_counter.h"
|
||||
#include "persistent-data/checksum.h"
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
#include "persistent-data/transaction_manager.h"
|
||||
|
||||
using namespace persistent_data;
|
||||
|
@ -23,6 +23,7 @@ thin_provisioning::register_thin_commands(base::application &app)
|
||||
app.add_cmd(command::ptr(new thin_ll_restore_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_scan_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_generate_metadata_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_generate_mappings_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_show_duplicates_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_show_metadata_cmd()));
|
||||
app.add_cmd(command::ptr(new thin_journal_cmd()));
|
||||
|
@ -110,6 +110,13 @@ namespace thin_provisioning {
|
||||
virtual int run(int argc, char **argv);
|
||||
};
|
||||
|
||||
class thin_generate_mappings_cmd : public base::command {
|
||||
public:
|
||||
thin_generate_mappings_cmd();
|
||||
virtual void usage(std::ostream &out) const;
|
||||
virtual int run(int argc, char **argv);
|
||||
};
|
||||
|
||||
class thin_show_metadata_cmd : public base::command {
|
||||
public:
|
||||
thin_show_metadata_cmd();
|
||||
|
@ -54,6 +54,13 @@ namespace thin_provisioning {
|
||||
snapshotted_time_(0) {
|
||||
}
|
||||
|
||||
device_details::device_details(uint64_t tid, uint32_t time)
|
||||
: mapped_blocks_(0),
|
||||
transaction_id_(tid),
|
||||
creation_time_(time),
|
||||
snapshotted_time_(time) {
|
||||
}
|
||||
|
||||
void
|
||||
device_details_traits::unpack(device_details_disk const &disk, device_details &value)
|
||||
{
|
||||
|
@ -17,6 +17,7 @@ namespace thin_provisioning {
|
||||
|
||||
struct device_details {
|
||||
device_details();
|
||||
device_details(uint64_t tid, uint32_t time);
|
||||
|
||||
uint64_t mapped_blocks_;
|
||||
uint64_t transaction_id_; /* when created */
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "thin-provisioning/metadata.h"
|
||||
|
||||
#include "persistent-data/file_utils.h"
|
||||
#include "persistent-data/math_utils.h"
|
||||
#include "base/math_utils.h"
|
||||
#include "persistent-data/space-maps/core.h"
|
||||
#include "persistent-data/space-maps/disk.h"
|
||||
|
||||
|
@ -37,7 +37,6 @@ namespace thin_provisioning {
|
||||
using namespace base;
|
||||
using namespace persistent_data;
|
||||
|
||||
typedef uint64_t sector_t;
|
||||
typedef uint32_t thin_dev_t;
|
||||
|
||||
//------------------------------------------------
|
||||
|
207
thin-provisioning/thin_generate_mappings.cc
Normal file
207
thin-provisioning/thin_generate_mappings.cc
Normal file
@ -0,0 +1,207 @@
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "base/io_generator.h"
|
||||
#include "base/output_file_requirements.h"
|
||||
#include "persistent-data/file_utils.h"
|
||||
#include "thin-provisioning/commands.h"
|
||||
#include "thin-provisioning/thin_pool.h"
|
||||
#include "version.h"
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace boost;
|
||||
using namespace thin_provisioning;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
struct flags {
|
||||
flags()
|
||||
: pattern("write"),
|
||||
offset(0)
|
||||
{
|
||||
}
|
||||
|
||||
bool check_conformance();
|
||||
|
||||
boost::optional<string> output;
|
||||
base::io_pattern pattern;
|
||||
boost::optional<unsigned> dev_id;
|
||||
boost::optional<base::sector_t> block_size;
|
||||
base::sector_t offset;
|
||||
boost::optional<base::sector_t> size;
|
||||
boost::optional<base::sector_t> io_size;
|
||||
};
|
||||
|
||||
bool flags::check_conformance() {
|
||||
if (!output) {
|
||||
cerr << "No output file provided." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dev_id) {
|
||||
cerr << "No device id provided." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
cerr << "No device size specified" << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
check_output_file_requirements(*output);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//--------------------------------
|
||||
|
||||
thin_pool::ptr open_pool(flags const &fs) {
|
||||
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
|
||||
return thin_pool::ptr(new thin_pool(bm));
|
||||
}
|
||||
|
||||
int generate_mappings(flags const &fs) {
|
||||
thin_pool::ptr pool = open_pool(fs);
|
||||
|
||||
thin::ptr td = pool->open_thin(*fs.dev_id);
|
||||
|
||||
io_generator_options opts;
|
||||
opts.pattern_ = fs.pattern;
|
||||
opts.block_size_ = !fs.block_size ?
|
||||
pool->get_data_block_size() :
|
||||
*fs.block_size;
|
||||
opts.offset_ = fs.offset;
|
||||
opts.size_ = *fs.size;
|
||||
opts.io_size_ = !fs.io_size ? *fs.size : *fs.io_size;
|
||||
io_generator::ptr gen = create_io_generator(opts);
|
||||
|
||||
base::io io;
|
||||
while (gen->has_next()) {
|
||||
// TODO: support io.size_
|
||||
gen->next(io);
|
||||
|
||||
switch (io.op_) {
|
||||
case base::REQ_OP_READ:
|
||||
process_read(td, pool, io.sector_);
|
||||
break;
|
||||
case base::REQ_OP_WRITE:
|
||||
process_write(td, pool, io.sector_);
|
||||
break;
|
||||
case base::REQ_OP_DISCARD:
|
||||
process_discard(td, pool, io.sector_);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pool->commit();
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
thin_generate_mappings_cmd::thin_generate_mappings_cmd()
|
||||
: command("thin_generate_mappings")
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
thin_generate_mappings_cmd::usage(std::ostream &out) const
|
||||
{
|
||||
out << "Usage: " << get_name() << " [options]\n"
|
||||
<< "Options:\n"
|
||||
<< " {-h|--help}\n"
|
||||
<< " {-o|--output} <output device or file>\n"
|
||||
<< " {--dev-id} <dev-id>\n"
|
||||
<< " {--offset} <offset>\n"
|
||||
<< " {--io-size} <io_size>\n"
|
||||
<< " {--rw write|trim|randwrite|randtrim|randtw}\n"
|
||||
<< " {--size} <size>\n"
|
||||
<< " {-V|--version}" << endl;
|
||||
}
|
||||
|
||||
int
|
||||
thin_generate_mappings_cmd::run(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
struct flags fs;
|
||||
const char *shortopts = "hi:o:qV";
|
||||
const struct option longopts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "output", required_argument, NULL, 'o' },
|
||||
{ "dev-id", required_argument, NULL, 1 },
|
||||
{ "rw", required_argument, NULL, 2 },
|
||||
{ "offset", required_argument, NULL, 3 },
|
||||
{ "size", required_argument, NULL, 4 },
|
||||
{ "io-size", required_argument, NULL, 5 },
|
||||
{ "version", no_argument, NULL, 'V' },
|
||||
{ NULL, no_argument, NULL, 0 }
|
||||
};
|
||||
|
||||
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
|
||||
switch(c) {
|
||||
case 'h':
|
||||
usage(cout);
|
||||
return 0;
|
||||
|
||||
case 'o':
|
||||
fs.output = optarg;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
fs.dev_id = parse_uint64(optarg, "dev_id");
|
||||
break;
|
||||
|
||||
case 2:
|
||||
fs.pattern.parse(optarg);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
fs.offset = parse_uint64(optarg, "offset");
|
||||
break;
|
||||
|
||||
case 4:
|
||||
fs.size = parse_uint64(optarg, "size");
|
||||
break;
|
||||
|
||||
case 5:
|
||||
fs.io_size = parse_uint64(optarg, "io_size");
|
||||
break;
|
||||
|
||||
case 'V':
|
||||
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
usage(cerr);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs.check_conformance()) {
|
||||
usage(cerr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return generate_mappings(fs);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
@ -19,7 +19,7 @@
|
||||
#include "base/output_file_requirements.h"
|
||||
#include "persistent-data/file_utils.h"
|
||||
#include "thin-provisioning/commands.h"
|
||||
#include "thin-provisioning/metadata.h"
|
||||
#include "thin-provisioning/thin_pool.h"
|
||||
#include "version.h"
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
@ -27,8 +27,6 @@
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace boost;
|
||||
using namespace persistent_data;
|
||||
using namespace std;
|
||||
using namespace thin_provisioning;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
@ -40,6 +38,11 @@ namespace {
|
||||
METADATA_OP_FORMAT,
|
||||
METADATA_OP_OPEN,
|
||||
METADATA_OP_CREATE_THIN,
|
||||
METADATA_OP_CREATE_SNAP,
|
||||
METADATA_OP_DELETE_DEV,
|
||||
METADATA_OP_SET_TRANSACTION_ID,
|
||||
METADATA_OP_RESERVE_METADATA_SNAP,
|
||||
METADATA_OP_RELEASE_METADATA_SNAP,
|
||||
METADATA_OP_LAST
|
||||
};
|
||||
|
||||
@ -55,7 +58,9 @@ namespace {
|
||||
metadata_operations op;
|
||||
sector_t data_block_size;
|
||||
block_address nr_data_blocks;
|
||||
optional<uint64_t> dev_id;
|
||||
optional<thin_dev_t> dev_id;
|
||||
optional<thin_dev_t> origin;
|
||||
optional<uint64_t> trans_id;
|
||||
optional<string> output;
|
||||
};
|
||||
|
||||
@ -77,88 +82,63 @@ namespace {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//--------------------------------
|
||||
|
||||
single_mapping_tree::ptr new_mapping_tree(metadata::ptr md) {
|
||||
return single_mapping_tree::ptr(
|
||||
new single_mapping_tree(*md->tm_,
|
||||
mapping_tree_detail::block_time_ref_counter(md->data_sm_)));
|
||||
}
|
||||
|
||||
bool is_device_exists(metadata::ptr md, uint64_t dev_id) {
|
||||
uint64_t key[1] = {dev_id};
|
||||
|
||||
device_tree::maybe_value v1 = md->details_->lookup(key);
|
||||
if (v1)
|
||||
return true;
|
||||
|
||||
dev_tree::maybe_value v2 = md->mappings_top_level_->lookup(key);
|
||||
if (v2)
|
||||
return true;
|
||||
|
||||
if (op == METADATA_OP_CREATE_SNAP && (!dev_id || !origin)) {
|
||||
cerr << "no device id provided." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op == METADATA_OP_DELETE_DEV && !dev_id) {
|
||||
cerr << "no device id provided." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op == METADATA_OP_SET_TRANSACTION_ID && !trans_id) {
|
||||
cerr << "no transaction id provided." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//--------------------------------
|
||||
|
||||
metadata::ptr format_metadata(block_manager::ptr bm,
|
||||
sector_t data_block_size,
|
||||
block_address nr_data_blocks) {
|
||||
metadata::ptr md(new metadata(bm,
|
||||
metadata::CREATE,
|
||||
data_block_size,
|
||||
nr_data_blocks));
|
||||
md->commit();
|
||||
return md;
|
||||
}
|
||||
|
||||
metadata::ptr open_metadata(block_manager::ptr bm) {
|
||||
metadata::ptr md(new metadata(bm, true));
|
||||
return md;
|
||||
}
|
||||
|
||||
void create_thin(metadata::ptr md, uint64_t dev_id) {
|
||||
uint64_t key[1] = {dev_id};
|
||||
|
||||
if (is_device_exists(md, dev_id))
|
||||
throw runtime_error("device already exists");
|
||||
|
||||
device_tree_detail::device_details details;
|
||||
details.transaction_id_ = md->sb_.trans_id_;
|
||||
details.creation_time_ = md->sb_.time_;
|
||||
details.snapshotted_time_ = details.creation_time_;
|
||||
md->details_->insert(key, details);
|
||||
|
||||
single_mapping_tree::ptr subtree = new_mapping_tree(md);
|
||||
md->mappings_top_level_->insert(key, subtree->get_root());
|
||||
md->mappings_->set_root(md->mappings_top_level_->get_root()); // FIXME: ugly
|
||||
|
||||
md->commit();
|
||||
}
|
||||
|
||||
metadata::ptr open_or_format_metadata(block_manager::ptr bm, flags const &fs) {
|
||||
thin_pool::ptr open_or_create_pool(flags const &fs) {
|
||||
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
|
||||
|
||||
if (fs.op == flags::METADATA_OP_FORMAT)
|
||||
return format_metadata(bm, fs.data_block_size, fs.nr_data_blocks);
|
||||
return thin_pool::ptr(new thin_pool(bm, fs.data_block_size, fs.nr_data_blocks));
|
||||
else
|
||||
return open_metadata(bm);
|
||||
return thin_pool::ptr(new thin_pool(bm));
|
||||
}
|
||||
|
||||
int generate_metadata(flags const &fs) {
|
||||
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
|
||||
metadata::ptr md = open_or_format_metadata(bm, fs);
|
||||
thin_pool::ptr pool = open_or_create_pool(fs);
|
||||
|
||||
switch (fs.op) {
|
||||
case flags::METADATA_OP_CREATE_THIN:
|
||||
create_thin(md, *fs.dev_id);
|
||||
pool->create_thin(*fs.dev_id);
|
||||
break;
|
||||
case flags::METADATA_OP_CREATE_SNAP:
|
||||
pool->create_snap(*fs.dev_id, *fs.origin);
|
||||
break;
|
||||
case flags::METADATA_OP_DELETE_DEV:
|
||||
pool->del(*fs.dev_id);
|
||||
break;
|
||||
case flags::METADATA_OP_SET_TRANSACTION_ID:
|
||||
pool->set_transaction_id(*fs.trans_id);
|
||||
break;
|
||||
case flags::METADATA_OP_RESERVE_METADATA_SNAP:
|
||||
pool->reserve_metadata_snap();
|
||||
break;
|
||||
case flags::METADATA_OP_RELEASE_METADATA_SNAP:
|
||||
pool->release_metadata_snap();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
pool->commit();
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -176,8 +156,16 @@ thin_generate_metadata_cmd::usage(std::ostream &out) const
|
||||
out << "Usage: " << get_name() << " [options]\n"
|
||||
<< "Options:\n"
|
||||
<< " {-h|--help}\n"
|
||||
<< " --data-block-size <block size>\n"
|
||||
<< " --nr-data-blocks <nr>\n"
|
||||
<< " {--format}\n"
|
||||
<< " {--create-thin} <dev-id>\n"
|
||||
<< " {--create-snap} <dev-id>\n"
|
||||
<< " {--delete} <dev-id>\n"
|
||||
<< " {--reserve-metadata-snap}\n"
|
||||
<< " {--release-metadata-snap}\n"
|
||||
<< " {--set-transaction-id} <tid>\n"
|
||||
<< " {--data-block-size} <block size>\n"
|
||||
<< " {--nr-data-blocks} <nr>\n"
|
||||
<< " {--origin} <origin-id>\n"
|
||||
<< " {-o|--output} <output device or file>\n"
|
||||
<< " {-V|--version}" << endl;
|
||||
}
|
||||
@ -193,10 +181,15 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
|
||||
{ "output", required_argument, NULL, 'o' },
|
||||
{ "format", no_argument, NULL, 1 },
|
||||
{ "open", no_argument, NULL, 2 },
|
||||
{ "create-thin", no_argument, NULL, 3 },
|
||||
{ "create-thin", required_argument, NULL, 3 },
|
||||
{ "create-snap", required_argument, NULL, 4 },
|
||||
{ "delete", required_argument, NULL, 5 },
|
||||
{ "set-transaction-id", required_argument, NULL, 6 },
|
||||
{ "reserve-metadata-snap", no_argument, NULL, 7 },
|
||||
{ "release-metadata-snap", no_argument, NULL, 8 },
|
||||
{ "data-block-size", required_argument, NULL, 101 },
|
||||
{ "nr-data-blocks", required_argument, NULL, 102 },
|
||||
{ "dev-id", required_argument, NULL, 301 },
|
||||
{ "origin", required_argument, NULL, 401 },
|
||||
{ "version", no_argument, NULL, 'V' },
|
||||
{ NULL, no_argument, NULL, 0 }
|
||||
};
|
||||
@ -221,6 +214,30 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
|
||||
|
||||
case 3:
|
||||
fs.op = flags::METADATA_OP_CREATE_THIN;
|
||||
fs.dev_id = parse_uint64(optarg, "device id");
|
||||
break;
|
||||
|
||||
case 4:
|
||||
fs.op = flags::METADATA_OP_CREATE_SNAP;
|
||||
fs.dev_id = parse_uint64(optarg, "device id");
|
||||
break;
|
||||
|
||||
case 5:
|
||||
fs.op = flags::METADATA_OP_DELETE_DEV;
|
||||
fs.dev_id = parse_uint64(optarg, "device id");
|
||||
break;
|
||||
|
||||
case 6:
|
||||
fs.op = flags::METADATA_OP_SET_TRANSACTION_ID;
|
||||
fs.trans_id = parse_uint64(optarg, "transaction id");
|
||||
break;
|
||||
|
||||
case 7:
|
||||
fs.op = flags::METADATA_OP_RESERVE_METADATA_SNAP;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
fs.op = flags::METADATA_OP_RELEASE_METADATA_SNAP;
|
||||
break;
|
||||
|
||||
case 101:
|
||||
@ -231,8 +248,8 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
|
||||
fs.nr_data_blocks = parse_uint64(optarg, "nr data blocks");
|
||||
break;
|
||||
|
||||
case 301:
|
||||
fs.dev_id = parse_uint64(optarg, "dev id");
|
||||
case 401:
|
||||
fs.origin = parse_uint64(optarg, "origin");
|
||||
break;
|
||||
|
||||
case 'V':
|
||||
|
@ -16,13 +16,10 @@
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "base/math_utils.h"
|
||||
#include "thin-provisioning/thin_pool.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <map>
|
||||
|
||||
using namespace base;
|
||||
using namespace std;
|
||||
@ -31,9 +28,22 @@ using namespace thin_provisioning;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
thin::thin(thin_dev_t dev, thin_pool *pool)
|
||||
thin::thin(thin_dev_t dev, thin_pool &pool)
|
||||
: dev_(dev),
|
||||
pool_(pool)
|
||||
pool_(pool),
|
||||
details_(pool.get_transaction_id(), pool.get_time()),
|
||||
open_count_(1),
|
||||
changed_(true)
|
||||
{
|
||||
}
|
||||
|
||||
thin::thin(thin_dev_t dev, thin_pool &pool,
|
||||
device_tree_detail::device_details const &details)
|
||||
: dev_(dev),
|
||||
pool_(pool),
|
||||
details_(details),
|
||||
open_count_(1),
|
||||
changed_(false)
|
||||
{
|
||||
}
|
||||
|
||||
@ -47,66 +57,76 @@ thin::maybe_address
|
||||
thin::lookup(block_address thin_block)
|
||||
{
|
||||
uint64_t key[2] = {dev_, thin_block};
|
||||
return pool_->md_->mappings_->lookup(key);
|
||||
mapping_tree::maybe_value m = pool_.md_->mappings_->lookup(key);
|
||||
if (!m)
|
||||
return thin::maybe_address();
|
||||
|
||||
lookup_result r;
|
||||
r.block_ = m->block_;
|
||||
r.shared_ = m->time_ < details_.snapshotted_time_;
|
||||
return r;
|
||||
}
|
||||
|
||||
bool
|
||||
thin::insert(block_address thin_block, block_address data_block)
|
||||
{
|
||||
uint64_t key[2] = {dev_, thin_block};
|
||||
|
||||
++details_.mapped_blocks_;
|
||||
changed_ = true;
|
||||
|
||||
mapping_tree_detail::block_time bt;
|
||||
bt.block_ = data_block;
|
||||
bt.time_ = 0; // FIXME: use current time.
|
||||
return pool_->md_->mappings_->insert(key, bt);
|
||||
bt.time_ = pool_.get_time();
|
||||
return pool_.md_->mappings_->insert(key, bt);
|
||||
}
|
||||
|
||||
void
|
||||
thin::remove(block_address thin_block)
|
||||
{
|
||||
uint64_t key[2] = {dev_, thin_block};
|
||||
pool_->md_->mappings_->remove(key);
|
||||
pool_.md_->mappings_->remove(key);
|
||||
|
||||
--details_.mapped_blocks_;
|
||||
changed_ = true;
|
||||
}
|
||||
|
||||
void
|
||||
thin::set_snapshot_time(uint32_t time)
|
||||
{
|
||||
uint64_t key[1] = { dev_ };
|
||||
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
|
||||
if (!mdetail)
|
||||
throw runtime_error("no such device");
|
||||
|
||||
mdetail->snapshotted_time_ = time;
|
||||
pool_->md_->details_->insert(key, *mdetail);
|
||||
details_.snapshotted_time_ = time;
|
||||
changed_ = true;
|
||||
}
|
||||
|
||||
block_address
|
||||
thin::get_mapped_blocks() const
|
||||
{
|
||||
uint64_t key[1] = { dev_ };
|
||||
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
|
||||
if (!mdetail)
|
||||
throw runtime_error("no such device");
|
||||
|
||||
return mdetail->mapped_blocks_;
|
||||
return details_.mapped_blocks_;
|
||||
}
|
||||
|
||||
void
|
||||
thin::set_mapped_blocks(block_address count)
|
||||
{
|
||||
uint64_t key[1] = { dev_ };
|
||||
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
|
||||
if (!mdetail)
|
||||
throw runtime_error("no such device");
|
||||
|
||||
mdetail->mapped_blocks_ = count;
|
||||
pool_->md_->details_->insert(key, *mdetail);
|
||||
details_.mapped_blocks_ = count;
|
||||
changed_ = true;
|
||||
}
|
||||
|
||||
//--------------------------------
|
||||
|
||||
thin_pool::thin_pool(metadata::ptr md)
|
||||
: md_(md)
|
||||
thin_pool::thin_pool(block_manager::ptr bm)
|
||||
{
|
||||
md_ = metadata::ptr(new metadata(bm, true));
|
||||
}
|
||||
|
||||
thin_pool::thin_pool(block_manager::ptr bm,
|
||||
sector_t data_block_size,
|
||||
block_address nr_data_blocks)
|
||||
{
|
||||
md_ = metadata::ptr(new metadata(bm,
|
||||
metadata::CREATE,
|
||||
data_block_size,
|
||||
nr_data_blocks));
|
||||
md_->commit();
|
||||
}
|
||||
|
||||
thin_pool::~thin_pool()
|
||||
@ -120,14 +140,15 @@ thin_pool::create_thin(thin_dev_t dev)
|
||||
uint64_t key[1] = {dev};
|
||||
|
||||
if (device_exists(dev))
|
||||
throw std::runtime_error("Device already exists");
|
||||
throw std::runtime_error("device already exists");
|
||||
|
||||
single_mapping_tree::ptr new_tree(new single_mapping_tree(*md_->tm_,
|
||||
mapping_tree_detail::block_time_ref_counter(md_->data_sm_)));
|
||||
md_->mappings_top_level_->insert(key, new_tree->get_root());
|
||||
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
|
||||
|
||||
// FIXME: doesn't set up the device details
|
||||
thin::ptr r = create_device(dev);
|
||||
close_device(r);
|
||||
}
|
||||
|
||||
void
|
||||
@ -136,31 +157,62 @@ thin_pool::create_snap(thin_dev_t dev, thin_dev_t origin)
|
||||
uint64_t snap_key[1] = {dev};
|
||||
uint64_t origin_key[1] = {origin};
|
||||
|
||||
boost::optional<uint64_t> mtree_root = md_->mappings_top_level_->lookup(origin_key);
|
||||
if (device_exists(dev))
|
||||
throw std::runtime_error("device already exists");
|
||||
|
||||
// find the mapping tree of the origin
|
||||
dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(origin_key);
|
||||
if (!mtree_root)
|
||||
throw std::runtime_error("unknown origin");
|
||||
|
||||
single_mapping_tree otree(*md_->tm_, *mtree_root,
|
||||
mapping_tree_detail::block_time_ref_counter(md_->data_sm_));
|
||||
|
||||
// clone the origin
|
||||
single_mapping_tree::ptr clone(otree.clone());
|
||||
md_->mappings_top_level_->insert(snap_key, clone->get_root());
|
||||
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
|
||||
|
||||
md_->sb_.time_++;
|
||||
|
||||
thin::ptr o = open_thin(origin);
|
||||
thin::ptr s = open_thin(dev);
|
||||
o->set_snapshot_time(md_->sb_.time_);
|
||||
s->set_snapshot_time(md_->sb_.time_);
|
||||
s->set_mapped_blocks(o->get_mapped_blocks());
|
||||
// create details for the snapshot
|
||||
thin::ptr s = create_device(dev);
|
||||
set_snapshot_details(s, origin);
|
||||
close_device(s);
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::del(thin_dev_t dev)
|
||||
{
|
||||
uint64_t key[1] = {dev};
|
||||
|
||||
thin::ptr td = open_device(dev);
|
||||
if (td->open_count_ > 1) {
|
||||
close_device(td);
|
||||
throw std::runtime_error("device busy");
|
||||
}
|
||||
|
||||
thin_devices_.erase(dev);
|
||||
|
||||
dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(key);
|
||||
if (!device_exists(dev) || !mtree_root)
|
||||
throw std::runtime_error("unknown device");
|
||||
|
||||
// TODO: trigger subtree deletion from the mtree_ref_counter,
|
||||
// like the kenrel subtree_dec() does.
|
||||
single_mapping_tree mtree(*md_->tm_, *mtree_root,
|
||||
mapping_tree_detail::block_time_ref_counter(md_->data_sm_));
|
||||
mtree.destroy();
|
||||
|
||||
md_->details_->remove(key);
|
||||
md_->mappings_top_level_->remove(key);
|
||||
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::commit()
|
||||
{
|
||||
write_changed_details();
|
||||
md_->commit();
|
||||
}
|
||||
|
||||
void
|
||||
@ -175,6 +227,52 @@ thin_pool::get_transaction_id() const
|
||||
return md_->sb_.trans_id_;
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::reserve_metadata_snap()
|
||||
{
|
||||
if (md_->sb_.metadata_snap_)
|
||||
throw std::runtime_error("pool metadata snapshot already exists.");
|
||||
|
||||
commit();
|
||||
|
||||
md_->metadata_sm_->inc(superblock_detail::SUPERBLOCK_LOCATION);
|
||||
transaction_manager::write_ref wr = md_->tm_->shadow(
|
||||
superblock_detail::SUPERBLOCK_LOCATION,
|
||||
superblock_validator()).first;
|
||||
|
||||
superblock_detail::superblock sb;
|
||||
superblock_detail::superblock_disk *sbd = reinterpret_cast<superblock_detail::superblock_disk *>(wr.data());
|
||||
superblock_detail::superblock_traits::unpack(*sbd, sb);
|
||||
|
||||
memset(sb.data_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE);
|
||||
memset(sb.metadata_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE);
|
||||
md_->metadata_sm_->inc(sb.data_mapping_root_);
|
||||
md_->metadata_sm_->inc(sb.device_details_root_);
|
||||
|
||||
superblock_detail::superblock_traits::pack(sb, *sbd);
|
||||
|
||||
md_->sb_.metadata_snap_ = wr.get_location();
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::release_metadata_snap()
|
||||
{
|
||||
if (!md_->sb_.metadata_snap_)
|
||||
throw std::runtime_error("No pool metadata snapshot found");
|
||||
|
||||
superblock_detail::superblock sb = read_superblock(md_->tm_->get_bm(),
|
||||
md_->sb_.metadata_snap_);
|
||||
device_tree dtree(*md_->tm_, sb.device_details_root_,
|
||||
device_tree_detail::device_details_traits::ref_counter());
|
||||
dtree.destroy();
|
||||
mapping_tree mtree(*md_->tm_, sb.data_mapping_root_,
|
||||
mapping_tree_detail::block_traits::ref_counter(md_->tm_->get_sm()));
|
||||
mtree.destroy();
|
||||
md_->metadata_sm_->dec(md_->sb_.metadata_snap_);
|
||||
|
||||
md_->sb_.metadata_snap_ = 0;
|
||||
}
|
||||
|
||||
block_address
|
||||
thin_pool::get_metadata_snap() const
|
||||
{
|
||||
@ -184,7 +282,7 @@ thin_pool::get_metadata_snap() const
|
||||
block_address
|
||||
thin_pool::alloc_data_block()
|
||||
{
|
||||
boost::optional<block_address> mb = md_->data_sm_->new_block();
|
||||
space_map::maybe_block mb = md_->data_sm_->new_block();
|
||||
if (!mb)
|
||||
throw runtime_error("couldn't allocate new block");
|
||||
|
||||
@ -203,7 +301,7 @@ thin_pool::get_nr_free_data_blocks() const
|
||||
return md_->data_sm_->get_nr_free();
|
||||
}
|
||||
|
||||
thin_provisioning::sector_t
|
||||
sector_t
|
||||
thin_pool::get_data_block_size() const
|
||||
{
|
||||
return md_->sb_.data_block_size_;
|
||||
@ -215,17 +313,22 @@ thin_pool::get_data_dev_size() const
|
||||
return md_->data_sm_->get_nr_blocks();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
thin_pool::get_time() const
|
||||
{
|
||||
return md_->sb_.time_;
|
||||
}
|
||||
|
||||
thin::ptr
|
||||
thin_pool::open_thin(thin_dev_t dev)
|
||||
{
|
||||
uint64_t key[1] = {dev};
|
||||
boost::optional<device_tree_detail::device_details> mdetails = md_->details_->lookup(key);
|
||||
if (!mdetails)
|
||||
throw runtime_error("no such device");
|
||||
return open_device(dev);
|
||||
}
|
||||
|
||||
thin *ptr = new thin(dev, this);
|
||||
thin::ptr r(ptr);
|
||||
return r;
|
||||
void
|
||||
thin_pool::close_thin(thin::ptr td)
|
||||
{
|
||||
close_device(td);
|
||||
}
|
||||
|
||||
bool
|
||||
@ -235,4 +338,107 @@ thin_pool::device_exists(thin_dev_t dev) const
|
||||
return !!md_->details_->lookup(key);
|
||||
}
|
||||
|
||||
thin::ptr
|
||||
thin_pool::create_device(thin_dev_t dev)
|
||||
{
|
||||
device_map::iterator it = thin_devices_.find(dev);
|
||||
if (it != thin_devices_.end())
|
||||
throw std::runtime_error("device already exists");
|
||||
|
||||
thin::ptr td(new thin(dev, *this));
|
||||
thin_devices_[dev] = td;
|
||||
return td;
|
||||
}
|
||||
|
||||
thin::ptr
|
||||
thin_pool::open_device(thin_dev_t dev)
|
||||
{
|
||||
device_map::iterator it = thin_devices_.find(dev);
|
||||
if (it != thin_devices_.end()) {
|
||||
thin::ptr td = it->second;
|
||||
td->open_count_++;
|
||||
return td;
|
||||
}
|
||||
|
||||
uint64_t key[1] = {dev};
|
||||
device_tree::maybe_value details = md_->details_->lookup(key);
|
||||
if (!details)
|
||||
throw std::runtime_error("no such device");
|
||||
|
||||
thin::ptr td(new thin(dev, *this, *details));
|
||||
thin_devices_[dev] = td;
|
||||
return td;
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::close_device(thin::ptr td)
|
||||
{
|
||||
td->open_count_--;
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::set_snapshot_details(thin::ptr snap, thin_dev_t origin)
|
||||
{
|
||||
thin::ptr o = open_device(origin);
|
||||
o->set_snapshot_time(md_->sb_.time_);
|
||||
snap->set_snapshot_time(md_->sb_.time_);
|
||||
snap->set_mapped_blocks(o->get_mapped_blocks());
|
||||
close_device(o);
|
||||
}
|
||||
|
||||
void
|
||||
thin_pool::write_changed_details()
|
||||
{
|
||||
for (auto it = thin_devices_.cbegin(); it != thin_devices_.cend(); ) {
|
||||
uint64_t key[1] = {it->first};
|
||||
thin::ptr td = it->second;
|
||||
|
||||
if (td->changed_) {
|
||||
md_->details_->insert(key, td->details_);
|
||||
td->changed_ = false;
|
||||
}
|
||||
|
||||
if (!td->open_count_)
|
||||
it = thin_devices_.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
void
|
||||
thin_provisioning::process_read(thin::ptr td, thin_pool::ptr tp,
|
||||
sector_t offset)
|
||||
{
|
||||
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
|
||||
td->lookup(blocknr);
|
||||
}
|
||||
|
||||
void
|
||||
thin_provisioning::process_write(thin::ptr td, thin_pool::ptr tp,
|
||||
sector_t offset)
|
||||
{
|
||||
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
|
||||
thin::maybe_address result = td->lookup(blocknr);
|
||||
if (!!result && !result->shared_)
|
||||
return;
|
||||
// TODO: handle out-of-space errors
|
||||
block_address data_block = tp->alloc_data_block();
|
||||
td->insert(blocknr, data_block);
|
||||
}
|
||||
|
||||
void
|
||||
thin_provisioning::process_discard(thin::ptr td, thin_pool::ptr tp,
|
||||
sector_t offset)
|
||||
{
|
||||
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
|
||||
thin::maybe_address result = td->lookup(blocknr);
|
||||
if (!result)
|
||||
return;
|
||||
td->remove(blocknr);
|
||||
if (!result->shared_)
|
||||
tp->free_data_block(result->block_);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
@ -33,8 +33,13 @@ namespace thin_provisioning {
|
||||
class thin_pool;
|
||||
class thin {
|
||||
public:
|
||||
struct lookup_result {
|
||||
block_address block_;
|
||||
bool shared_;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<thin> ptr;
|
||||
typedef boost::optional<mapping_tree_detail::block_time> maybe_address;
|
||||
typedef boost::optional<lookup_result> maybe_address;
|
||||
|
||||
thin_dev_t get_dev_t() const;
|
||||
maybe_address lookup(block_address thin_block);
|
||||
@ -48,26 +53,38 @@ namespace thin_provisioning {
|
||||
|
||||
private:
|
||||
friend class thin_pool;
|
||||
thin(thin_dev_t dev, thin_pool *pool); // FIXME: pass a reference rather than a ptr
|
||||
thin(thin_dev_t dev, thin_pool &pool);
|
||||
thin(thin_dev_t dev, thin_pool &pool,
|
||||
device_tree_detail::device_details const &details);
|
||||
|
||||
thin_dev_t dev_;
|
||||
thin_pool *pool_;
|
||||
thin_pool &pool_;
|
||||
device_tree_detail::device_details details_;
|
||||
uint32_t open_count_;
|
||||
bool changed_;
|
||||
};
|
||||
|
||||
class thin_pool {
|
||||
public:
|
||||
typedef std::shared_ptr<thin_pool> ptr;
|
||||
|
||||
thin_pool(metadata::ptr md);
|
||||
thin_pool(block_manager::ptr bm);
|
||||
thin_pool(block_manager::ptr bm,
|
||||
sector_t data_block_size,
|
||||
block_address nr_data_blocks);
|
||||
~thin_pool();
|
||||
|
||||
void create_thin(thin_dev_t dev);
|
||||
void create_snap(thin_dev_t dev, thin_dev_t origin);
|
||||
void del(thin_dev_t);
|
||||
void commit();
|
||||
|
||||
void set_transaction_id(uint64_t id);
|
||||
uint64_t get_transaction_id() const;
|
||||
|
||||
// handling metadata snapshot
|
||||
void reserve_metadata_snap();
|
||||
void release_metadata_snap();
|
||||
block_address get_metadata_snap() const;
|
||||
|
||||
block_address alloc_data_block();
|
||||
@ -77,15 +94,29 @@ namespace thin_provisioning {
|
||||
block_address get_nr_free_data_blocks() const;
|
||||
sector_t get_data_block_size() const;
|
||||
block_address get_data_dev_size() const;
|
||||
uint32_t get_time() const;
|
||||
|
||||
thin::ptr open_thin(thin_dev_t);
|
||||
void close_thin(thin::ptr td);
|
||||
|
||||
private:
|
||||
friend class thin;
|
||||
typedef std::map<thin_dev_t, thin::ptr> device_map;
|
||||
|
||||
bool device_exists(thin_dev_t dev) const;
|
||||
thin::ptr create_device(thin_dev_t dev);
|
||||
thin::ptr open_device(thin_dev_t dev);
|
||||
void close_device(thin::ptr device);
|
||||
void set_snapshot_details(thin::ptr snap, thin_dev_t origin);
|
||||
void write_changed_details();
|
||||
|
||||
metadata::ptr md_;
|
||||
device_map thin_devices_;
|
||||
};
|
||||
|
||||
void process_read(thin::ptr td, thin_pool::ptr tp, sector_t offset);
|
||||
void process_write(thin::ptr td, thin_pool::ptr tp, sector_t offset);
|
||||
void process_discard(thin::ptr td, thin_pool::ptr tp, sector_t offset);
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user