[btree] Implement btree::remove()

This commit is contained in:
Ming-Hung Tsai 2020-06-04 16:31:24 +08:00
parent ef8e94b22c
commit e724a72045
3 changed files with 572 additions and 7 deletions

View File

@ -0,0 +1,373 @@
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
namespace persistent_data {
template <unsigned Levels, typename ValueTraits>
btree_detail::shadow_child
btree<Levels, ValueTraits>::
create_shadow_child(internal_node &parent,
unsigned index)
{
block_address b = parent.value_at(index);
pair<write_ref, bool> p = tm_.shadow(b, validator_);
write_ref &wr = p.first;
btree_detail::node_type type;
node_ref<block_traits> n = to_node<block_traits>(wr);
if (n.get_type() == btree_detail::INTERNAL) {
type = btree_detail::INTERNAL;
if (p.second)
n.inc_children(internal_rc_);
} else {
type = btree_detail::LEAF;
if (p.second) {
node_ref<ValueTraits> leaf = to_node<ValueTraits>(wr);
leaf.inc_children(rc_);
}
}
parent.set_value(index, wr.get_location());
return btree_detail::shadow_child(wr, type);
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::
remove(key const &key)
{
using namespace btree_detail;
block_address block = root_;
unsigned index = 0;
shadow_spine spine(tm_, validator_);
bool need_remove = true;
for (unsigned level = 0; level < Levels - 1; ++level) {
need_remove = remove_location<block_traits>(spine, block,
key[level], &index,
internal_rc_);
if (!need_remove)
break;
internal_node n = spine.get_node<block_traits>();
block = n.value_at(index);
}
if (need_remove) {
need_remove = remove_location<ValueTraits>(spine, block,
key[Levels - 1], &index,
rc_);
if (need_remove) {
leaf_node leaf = spine.get_node<ValueTraits>();
leaf.delete_at(index);
}
}
root_ = spine.get_root();
}
template <unsigned Levels, typename _>
template <typename ValueTraits, typename RC>
bool
btree<Levels, _>::
remove_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
unsigned *index,
RC &leaf_rc)
{
using namespace btree_detail;
unsigned i = 0;
bool r = false;
for (;;) {
r = spine.step(block);
// patch up the parent to point to the new shadow
if (spine.has_parent()) {
internal_node p = spine.get_parent();
p.set_value(i, spine.get_block());
}
internal_node n = spine.get_node<block_traits>();
if (n.get_type() == btree_detail::LEAF) {
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
boost::optional<unsigned> idx = leaf.exact_search(key);
if (!idx)
return false;
*index = *idx;
return true;
}
r = rebalance_children<ValueTraits>(spine, key);
if (!r)
break;
n = spine.get_node<block_traits>();
if (n.get_type() == btree_detail::LEAF) {
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
boost::optional<unsigned> idx = leaf.exact_search(key);
if (!idx)
return false;
*index = *idx;
return true;
}
i = n.lower_bound(key);
block = n.value_at(i);
}
return r;
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
bool
btree<Levels, _>::
rebalance_children(btree_detail::shadow_spine &spine, uint64_t key)
{
internal_node n = spine.get_node<block_traits>();
if (n.get_nr_entries() == 1) {
block_address b = n.value_at(0);
read_ref child = tm_.read_lock(b, validator_);
// FIXME: is it safe?
::memcpy(n.raw(), child.data(), read_ref::BLOCK_SIZE);
tm_.get_sm()->dec(child.get_location());
return true;
}
int i = n.lower_bound(key);
if (i < 0)
return false;
bool has_left_sibling = i > 0;
bool has_right_sibling = static_cast<unsigned>(i) < (n.get_nr_entries() - 1);
if (!has_left_sibling)
rebalance2<ValueTraits>(spine, i);
else if (!has_right_sibling)
rebalance2<ValueTraits>(spine, i - 1);
else
rebalance3<ValueTraits>(spine, i - 1);
return true;
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
rebalance2(btree_detail::shadow_spine &spine, unsigned left_index)
{
internal_node parent = spine.get_node<block_traits>();
shadow_child left = create_shadow_child(parent, left_index);
shadow_child right = create_shadow_child(parent, left_index + 1);
// FIXME: ugly
if (left.get_type() == btree_detail::INTERNAL) {
internal_node l = left.get_node<block_traits>();
internal_node r = right.get_node<block_traits>();
__rebalance2(parent, l, r, left_index);
} else {
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
__rebalance2(parent, l, r, left_index);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
__rebalance2(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned nr_left = left.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned right_index = left_index + 1;
unsigned threshold = 2 * (left.merge_threshold() + 1);
if (nr_left + nr_right < threshold) {
// Merge the right child into the left
left.copy_entries_to_left(right, nr_right);
left.set_nr_entries(nr_left + nr_right);
parent.delete_at(right_index);
tm_.get_sm()->dec(right.get_location());
} else {
// Rebalance
unsigned target_left = (nr_left + nr_right) / 2;
left.move_entries(right, nr_left - target_left);
parent.set_key(right_index, right.key_at(0));
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
rebalance3(btree_detail::shadow_spine &spine, unsigned left_index)
{
internal_node parent = spine.get_node<block_traits>();
shadow_child left = create_shadow_child(parent, left_index);
shadow_child center = create_shadow_child(parent, left_index + 1);
shadow_child right = create_shadow_child(parent, left_index + 2);
// FIXME: ugly
if (left.get_type() == btree_detail::INTERNAL) {
internal_node l = left.get_node<block_traits>();
internal_node c = center.get_node<block_traits>();
internal_node r = right.get_node<block_traits>();
__rebalance3(parent, l, c, r, left_index);
} else {
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
node_ref<ValueTraits> c = center.get_node<ValueTraits>();
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
__rebalance3(parent, l, c, r, left_index);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
__rebalance3(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned threshold = left.merge_threshold() * 4 + 1;
if ((nr_left + nr_center + nr_right) < threshold)
delete_center_node(parent, left, center, right, left_index);
else
redistribute3(parent, left, center, right, left_index);
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
delete_center_node(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned center_index = left_index + 1;
unsigned right_index = left_index + 2;
unsigned max_entries = left.get_max_entries();
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned shift = std::min(max_entries - nr_left, nr_center);
if (nr_left + shift > max_entries)
throw std::runtime_error("too many entries");
left.copy_entries_to_left(center, shift);
left.set_nr_entries(nr_left + shift);
if (shift != nr_center) {
shift = nr_center - shift;
if ((nr_right + shift) > max_entries)
throw std::runtime_error("too many entries");
right.shift_entries_right(shift);
center.copy_entries_to_right(right, shift);
right.set_nr_entries(nr_right + shift);
}
parent.set_key(right_index, right.key_at(0));
parent.delete_at(center_index);
--right_index;
tm_.get_sm()->dec(center.get_location());
__rebalance2(parent, left, right, left_index);
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
redistribute3(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned center_index = left_index + 1;
unsigned right_index = left_index + 2;
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned max_entries = left.get_max_entries();
unsigned total = nr_left + nr_center + nr_right;
unsigned target_right = total / 3;
unsigned remainder = (target_right * 3) != total;
unsigned target_left = target_right + remainder;
if (target_left > max_entries || target_right > max_entries)
throw std::runtime_error("too many entries");
if (nr_left < nr_right) {
int s = nr_left - target_left;
// FIXME: signed & unsigned comparison
if (s < 0 && nr_center < static_cast<unsigned>(-s)) {
// not enough in central node
left.move_entries(center, -nr_center);
s += nr_center;
left.move_entries(right, s);
nr_right += s;
} else
left.move_entries(center, s);
center.move_entries(right, target_right - nr_right);
} else {
int s = target_right - nr_right;
if (s > 0 && nr_center < static_cast<unsigned>(s)) {
// not enough in central node
center.move_entries(right, nr_center);
s -= nr_center;
left.move_entries(right, s);
nr_left -= s;
} else
center.move_entries(right, s);
left.move_entries(center, nr_left - target_left);
}
parent.set_key(center_index, center.key_at(0));
parent.set_key(right_index, right.key_at(0));
}
};

View File

@ -110,12 +110,34 @@ namespace persistent_data {
uint64_t key,
typename ValueTraits::value_type const &v);
// Decrements the nr_entries field
void delete_at(unsigned i);
// Copies entries from another node, appends them
// to the back of this node. Adjusts nr_entries.
void copy_entries(node_ref const &rhs,
unsigned begin,
unsigned end);
// Moves entries between the sibling node,
// and maintains the key ordering.
// The nr_entreis of both nodes are adjusted.
void move_entries(node_ref &rhs,
int count);
// Copies entries from the beginning of rhs to the end of lhs,
// or copies entries from the end of lhs to the beginning of rhs.
// The nr_entries is not adjusted.
void copy_entries_to_left(node_ref const &rhs, unsigned count);
void copy_entries_to_right(node_ref &rhs, unsigned count) const;
// Shifts entries to left or right.
// The nr_entries is not adjusted.
void shift_entries_left(unsigned shift);
void shift_entries_right(unsigned shift);
unsigned merge_threshold() const;
// Various searches
int bsearch(uint64_t key, int want_hi) const;
boost::optional<unsigned> exact_search(uint64_t key) const;
@ -259,6 +281,26 @@ namespace persistent_data {
maybe_block root_;
};
class shadow_child {
public:
shadow_child(block_manager::write_ref &wr, node_type type)
: wr_(wr), type_(type) {
}
node_type get_type() const {
return type_;
}
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(wr_);
}
private:
block_manager::write_ref wr_;
node_type type_;
};
// Used to keep a record of a nested btree's position.
typedef std::vector<uint64_t> btree_path;
@ -399,6 +441,14 @@ namespace persistent_data {
int *index,
RC &leaf_rc);
template <typename ValueTraits2, typename RC>
bool
remove_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
unsigned *index,
RC &leaf_rc);
void walk_tree(visitor &visitor,
btree_detail::node_location const &loc,
block_address b) const;
@ -411,6 +461,53 @@ namespace persistent_data {
void inc_children(btree_detail::shadow_spine &spine,
RefCounter &leaf_rc);
btree_detail::shadow_child
create_shadow_child(internal_node &parent,
unsigned index);
template <typename ValueTraits2>
bool rebalance_children(btree_detail::shadow_spine &spine,
uint64_t key);
template <typename ValueTraits2>
void rebalance2(btree_detail::shadow_spine &spine,
unsigned left_index);
template <typename ValueTraits2>
void rebalance3(btree_detail::shadow_spine &spine,
unsigned left_index);
template <typename ValueTraits2>
void
__rebalance2(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
__rebalance3(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
delete_center_node(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
redistribute3(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
transaction_manager &tm_;
bool destroy_;
block_address root_;
@ -421,6 +518,7 @@ namespace persistent_data {
};
#include "btree.tcc"
#include "btree-remove.tcc"
//----------------------------------------------------------------

View File

@ -293,6 +293,23 @@ namespace persistent_data {
set_value(i, v);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::delete_at(unsigned i)
{
unsigned nr_entries = get_nr_entries();
if (i >= nr_entries)
throw runtime_error("key index out of bounds");
unsigned nr_to_copy = nr_entries - (i + 1);
if (nr_to_copy) {
::memmove(key_ptr(i), key_ptr(i + 1), sizeof(uint64_t) * nr_to_copy);
::memmove(value_ptr(i), value_ptr(i + 1), sizeof(typename ValueTraits::disk_type) * nr_to_copy);
}
set_nr_entries(nr_entries - 1);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries(node_ref const &rhs,
@ -309,6 +326,90 @@ namespace persistent_data {
set_nr_entries(n + count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::move_entries(node_ref<ValueTraits> &rhs,
int count)
{
if (!count)
return;
unsigned nr_left = get_nr_entries();
unsigned nr_right = rhs.get_nr_entries();
unsigned max_entries = get_max_entries();
if (nr_left - count > max_entries || nr_right - count > max_entries)
throw runtime_error("too many entries");
if (count > 0) {
rhs.shift_entries_right(count);
copy_entries_to_right(rhs, count);
} else {
copy_entries_to_left(rhs, -count);
rhs.shift_entries_left(-count);
}
set_nr_entries(nr_left - count);
rhs.set_nr_entries(nr_right + count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries_to_left(node_ref const &rhs, unsigned count)
{
unsigned n = get_nr_entries();
if ((n + count) > get_max_entries())
throw runtime_error("too many entries");
::memcpy(key_ptr(n), rhs.key_ptr(0), sizeof(uint64_t) * count);
::memcpy(value_ptr(n), rhs.value_ptr(0), sizeof(typename ValueTraits::disk_type) * count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries_to_right(node_ref &rhs, unsigned count) const
{
unsigned n = rhs.get_nr_entries();
if ((n + count) > get_max_entries())
throw runtime_error("too many entries");
unsigned nr_left = get_nr_entries();
::memcpy(rhs.key_ptr(0), key_ptr(nr_left - count), sizeof(uint64_t) * count);
::memcpy(rhs.value_ptr(0), value_ptr(nr_left - count), sizeof(typename ValueTraits::disk_type) * count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::shift_entries_left(unsigned shift)
{
unsigned n = get_nr_entries();
if (shift > n)
throw runtime_error("too many entries");
unsigned nr_shifted = n - shift;
::memmove(key_ptr(0), key_ptr(shift), sizeof(uint64_t) * nr_shifted);
::memmove(value_ptr(0), value_ptr(shift), sizeof(typename ValueTraits::disk_type) * nr_shifted);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::shift_entries_right(unsigned shift)
{
unsigned n = get_nr_entries();
if (n + shift > get_max_entries())
throw runtime_error("too many entries");
::memmove(key_ptr(shift), key_ptr(0), sizeof(uint64_t) * n);
::memmove(value_ptr(shift), value_ptr(0), sizeof(typename ValueTraits::disk_type) * n);
}
template <typename ValueTraits>
unsigned
node_ref<ValueTraits>::merge_threshold() const
{
return get_max_entries() / 3;
}
template <typename ValueTraits>
int
node_ref<ValueTraits>::bsearch(uint64_t key, int want_hi) const
@ -601,13 +702,6 @@ namespace persistent_data {
return need_insert;
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::remove(key const &key)
{
using namespace btree_detail;
}
template <unsigned Levels, typename ValueTraits>
block_address
btree<Levels, ValueTraits>::get_root() const