// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// .
#include "base/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/space-maps/disk_structures.h"
#include "persistent-data/space-maps/recursive.h"
#include "persistent-data/space-maps/careful_alloc.h"
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/data-structures/btree_counter.h"
#include "persistent-data/checksum.h"
#include "persistent-data/math_utils.h"
#include "persistent-data/transaction_manager.h"
using namespace persistent_data;
using namespace std;
using namespace sm_disk_detail;
//----------------------------------------------------------------
namespace {
uint64_t const BITMAP_CSUM_XOR = 240779;
struct bitmap_block_validator : public bcache::validator {
virtual void check(void const *raw, block_address location) const {
bitmap_header const *data = reinterpret_cast(raw);
crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu(data->csum))
throw checksum_error("bad checksum in space map bitmap");
if (to_cpu(data->blocknr) != location)
throw checksum_error("bad block nr in space map bitmap");
}
virtual bool check_raw(void const *raw) const {
bitmap_header const *data = reinterpret_cast(raw);
crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu(data->csum))
return false;
return true;
}
virtual void prepare(void *raw, block_address location) const {
bitmap_header *data = reinterpret_cast(raw);
data->blocknr = to_disk(location);
crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
data->csum = to_disk(sum.get_sum());
}
};
//--------------------------------
// FIXME: factor out the common code in these validators
struct index_block_validator : public bcache::validator {
virtual void check(void const *raw, block_address location) const {
metadata_index const *mi = reinterpret_cast(raw);
crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu(mi->csum_))
throw checksum_error("bad checksum in metadata index block");
if (to_cpu(mi->blocknr_) != location)
throw checksum_error("bad block nr in metadata index block");
}
virtual bool check_raw(void const *raw) const {
metadata_index const *mi = reinterpret_cast(raw);
crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu(mi->csum_))
return false;
return true;
}
virtual void prepare(void *raw, block_address location) const {
metadata_index *mi = reinterpret_cast(raw);
mi->blocknr_ = to_disk(location);
crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
mi->csum_ = to_disk(sum.get_sum());
}
};
//--------------------------------
class bitmap {
public:
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
bitmap(transaction_manager &tm,
index_entry const &ie,
bcache::validator::ptr v)
: tm_(tm),
validator_(v),
ie_(ie) {
}
ref_t lookup(unsigned b) const {
read_ref rr = tm_.read_lock(ie_.blocknr_, validator_);
return __lookup_raw(bitmap_data(rr), b);
}
void insert(unsigned b, ref_t n) {
write_ref wr = tm_.shadow(ie_.blocknr_, validator_).first;
void *bits = bitmap_data(wr);
bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1);
if (n == 1 || n == 3)
set_bit_le(bits, b * 2 + 1);
else
clear_bit_le(bits, b * 2 + 1);
if (n == 2 || n == 3)
set_bit_le(bits, b * 2);
else
clear_bit_le(bits, b * 2);
ie_.blocknr_ = wr.get_location();
if (was_free && n > 0) {
ie_.nr_free_--;
if (b == ie_.none_free_before_)
ie_.none_free_before_++;
}
if (!was_free && n == 0) {
ie_.nr_free_++;
if (b < ie_.none_free_before_)
ie_.none_free_before_ = b;
}
}
boost::optional find_free(unsigned begin, unsigned end) {
read_ref rr = tm_.read_lock(ie_.blocknr_, validator_);
void const *bits = bitmap_data(rr);
for (unsigned i = max(begin, ie_.none_free_before_); i < end; i++)
if (__lookup_raw(bits, i) == 0)
return boost::optional(i);
return boost::optional();
}
index_entry const &get_ie() const {
return ie_;
}
void iterate(block_address offset, block_address hi, space_map::iterator &it) const {
read_ref rr = tm_.read_lock(ie_.blocknr_, validator_);
void const *bits = bitmap_data(rr);
for (unsigned b = 0; b < hi; b++) {
ref_t b1 = test_bit_le(bits, b * 2);
ref_t b2 = test_bit_le(bits, b * 2 + 1);
ref_t result = b2 ? 1 : 0;
result |= b1 ? 2 : 0;
it(offset + b, result);
}
}
private:
void *bitmap_data(transaction_manager::write_ref &wr) {
bitmap_header *h = reinterpret_cast(wr.data());
return h + 1;
}
void const *bitmap_data(transaction_manager::read_ref &rr) const {
bitmap_header const *h = reinterpret_cast(rr.data());
return h + 1;
}
ref_t __lookup_raw(void const *bits, unsigned b) const {
ref_t b1 = test_bit_le(bits, b * 2);
ref_t b2 = test_bit_le(bits, b * 2 + 1);
ref_t result = b2 ? 1 : 0;
result |= b1 ? 2 : 0;
return result;
}
transaction_manager &tm_;
bcache::validator::ptr validator_;
index_entry ie_;
};
struct ref_count_traits {
typedef le32 disk_type;
typedef uint32_t value_type;
typedef no_op_ref_counter ref_counter;
static void unpack(disk_type const &d, value_type &v) {
v = to_cpu(d);
}
static void pack(value_type const &v, disk_type &d) {
d = to_disk(v);
}
};
#if 0
class ref_count_checker : public btree_checker<1, ref_count_traits> {
public:
typedef boost::shared_ptr ptr;
ref_count_checker(block_counter &counter)
: btree_checker<1, ref_count_traits>(counter) {
}
};
#endif
class index_entry_visitor {
public:
virtual ~index_entry_visitor() {}
virtual void visit(index_entry const &ie) = 0;
virtual void visit(run const &missing) = 0;
};
class index_store {
public:
typedef boost::shared_ptr ptr;
virtual void count_metadata(block_counter &bc) const = 0;
virtual void resize(block_address nr_indexes) = 0;
virtual index_entry find_ie(block_address b) const = 0;
virtual void save_ie(block_address b, struct index_entry ie) = 0;
virtual void commit_ies() = 0;
virtual ptr clone() const = 0;
virtual block_address get_root() const = 0;
virtual void visit(index_entry_visitor &v, block_address nr_index_entries) const = 0;
};
unsigned const ENTRIES_PER_BLOCK = (MD_BLOCK_SIZE - sizeof(bitmap_header)) * 4;
class sm_disk : public checked_space_map {
public:
typedef boost::shared_ptr ptr;
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
sm_disk(index_store::ptr indexes,
transaction_manager &tm)
: tm_(tm),
bitmap_validator_(new bitmap_block_validator),
indexes_(indexes),
nr_blocks_(0),
nr_allocated_(0),
search_start_(0),
ref_counts_(tm_, ref_count_traits::ref_counter()) {
}
sm_disk(index_store::ptr indexes,
transaction_manager &tm,
sm_root const &root)
: tm_(tm),
bitmap_validator_(new bitmap_block_validator),
indexes_(indexes),
nr_blocks_(root.nr_blocks_),
nr_allocated_(root.nr_allocated_),
search_start_(0),
ref_counts_(tm_, root.ref_count_root_, ref_count_traits::ref_counter()) {
}
block_address get_nr_blocks() const {
return nr_blocks_;
}
block_address get_nr_free() const {
return nr_blocks_ - nr_allocated_;
}
ref_t get_count(block_address b) const {
ref_t count = lookup_bitmap(b);
if (count == 3)
return lookup_ref_count(b);
return count;
}
template
void modify_count(block_address b, Mut const &m) {
check_block(b);
index_entry ie = indexes_->find_ie(b / ENTRIES_PER_BLOCK);
bitmap bm(tm_, ie, bitmap_validator_);
ref_t old = bm.lookup(b % ENTRIES_PER_BLOCK);
if (old == 3)
old = lookup_ref_count(b);
ref_t c = m(old);
if (c > 2) {
if (old < 3) {
bm.insert(b % ENTRIES_PER_BLOCK, 3);
indexes_->save_ie(b / ENTRIES_PER_BLOCK, bm.get_ie());
}
insert_ref_count(b, c);
} else {
if (old > 2)
remove_ref_count(b);
bm.insert(b % ENTRIES_PER_BLOCK, c);
indexes_->save_ie(b / ENTRIES_PER_BLOCK, bm.get_ie());
}
if (old == 0)
nr_allocated_++;
else if (c == 0) {
if (b < search_start_)
search_start_ = b;
nr_allocated_--;
}
}
struct override {
override(ref_t new_value)
: new_value_(new_value) {
}
ref_t operator()(ref_t old) const {
return new_value_;
}
ref_t new_value_;
};
void set_count(block_address b, ref_t c) {
override m(c);
modify_count(b, m);
}
void commit() {
indexes_->commit_ies();
}
static ref_t inc_mutator(ref_t c) {
return c + 1;
}
static ref_t dec_mutator(ref_t c) {
return c - 1;
}
void inc(block_address b) {
if (b == search_start_)
search_start_++;
modify_count(b, inc_mutator);
}
void dec(block_address b) {
modify_count(b, dec_mutator);
}
maybe_block find_free(span_iterator &it) {
for (maybe_span ms = it.first(); ms; ms = it.next()) {
block_address begin = ms->first;
block_address end = ms->second;
if (end < search_start_)
continue;
if (begin < search_start_)
begin = search_start_;
block_address begin_index = begin / ENTRIES_PER_BLOCK;
block_address end_index = div_up(end, ENTRIES_PER_BLOCK);
for (block_address index = begin_index; index < end_index; index++) {
index_entry ie = indexes_->find_ie(index);
bitmap bm(tm_, ie, bitmap_validator_);
unsigned bit_begin = (index == begin_index) ? (begin % ENTRIES_PER_BLOCK) : 0;
unsigned bit_end = (index == end_index - 1) ? (end % ENTRIES_PER_BLOCK) : ENTRIES_PER_BLOCK;
boost::optional maybe_b = bm.find_free(bit_begin, bit_end);
if (maybe_b) {
block_address b = (index * ENTRIES_PER_BLOCK) + *maybe_b;
if (b)
search_start_ = b - 1;
return b;
}
}
}
return maybe_block();
}
bool count_possibly_greater_than_one(block_address b) const {
return get_count(b) > 1;
}
virtual void extend(block_address extra_blocks) {
block_address nr_blocks = nr_blocks_ + extra_blocks;
block_address bitmap_count = div_up(nr_blocks, ENTRIES_PER_BLOCK);
block_address old_bitmap_count = div_up(nr_blocks_, ENTRIES_PER_BLOCK);
indexes_->resize(bitmap_count);
for (block_address i = old_bitmap_count; i < bitmap_count; i++) {
write_ref wr = tm_.new_block(bitmap_validator_);
index_entry ie;
ie.blocknr_ = wr.get_location();
ie.nr_free_ = i == (bitmap_count - 1) ?
(nr_blocks - ENTRIES_PER_BLOCK * i) : ENTRIES_PER_BLOCK;
ie.none_free_before_ = 0;
indexes_->save_ie(i, ie);
}
nr_blocks_ = nr_blocks;
}
virtual void visit(space_map_detail::visitor &v) const {
#if 0
ref_count_checker rcv(v);
ref_counts_.visit_depth_first(rcv);
block_address nr_entries = div_up(get_nr_blocks(), ENTRIES_PER_BLOCK);
indexes_->visit(v, nr_entries);
#endif
}
struct look_aside_iterator : public iterator {
look_aside_iterator(sm_disk const &smd, iterator &it)
: smd_(smd),
it_(it) {
}
virtual void operator () (block_address b, ref_t c) {
it_(b, c == 3 ? smd_.lookup_ref_count(b) : c);
}
sm_disk const &smd_;
iterator &it_;
};
friend struct look_aside_iterator;
virtual void iterate(iterator &it) const {
look_aside_iterator wrapper(*this, it);
unsigned nr_indexes = div_up(nr_blocks_, ENTRIES_PER_BLOCK);
for (unsigned i = 0; i < nr_indexes; i++) {
unsigned hi = (i == nr_indexes - 1) ? (nr_blocks_ - ENTRIES_PER_BLOCK * i) : ENTRIES_PER_BLOCK;
index_entry ie = indexes_->find_ie(i);
bitmap bm(tm_, ie, bitmap_validator_);
bm.iterate(i * ENTRIES_PER_BLOCK, hi, wrapper);
}
}
virtual void count_metadata(block_counter &bc) const {
indexes_->count_metadata(bc);
noop_value_counter vc;
count_btree_blocks(ref_counts_, bc, vc);
}
virtual size_t root_size() const {
return sizeof(sm_root_disk);
}
virtual void copy_root(void *dest, size_t len) const {
sm_root_disk d;
sm_root v;
if (len < sizeof(d))
throw runtime_error("root too small");
v.nr_blocks_ = sm_disk::get_nr_blocks();
v.nr_allocated_ = sm_disk::get_nr_allocated();
v.bitmap_root_ = get_index_store()->get_root();
v.ref_count_root_ = sm_disk::get_ref_count_root();
sm_root_traits::pack(v, d);
::memcpy(dest, &d, sizeof(d));
}
virtual checked_space_map::ptr clone() const {
sm_root root;
root.nr_blocks_ = nr_blocks_;
root.nr_allocated_ = nr_allocated_;
root.bitmap_root_ = indexes_->get_root();
root.ref_count_root_ = ref_counts_.get_root();
return checked_space_map::ptr(
new sm_disk(indexes_->clone(), tm_, root));
}
protected:
transaction_manager &get_tm() const {
return tm_;
}
block_address get_nr_allocated() const {
return nr_allocated_;
}
block_address get_ref_count_root() const {
return ref_counts_.get_root();
}
index_store::ptr get_index_store() const {
return indexes_;
}
private:
void check_block(block_address b) const {
if (b >= nr_blocks_) {
std::ostringstream out;
out << "space map disk: block out of bounds ("
<< b << " >= " << nr_blocks_ << ")";
throw std::runtime_error(out.str());
}
}
ref_t lookup_bitmap(block_address b) const {
check_block(b);
index_entry ie = indexes_->find_ie(b / ENTRIES_PER_BLOCK);
bitmap bm(tm_, ie, bitmap_validator_);
return bm.lookup(b % ENTRIES_PER_BLOCK);
}
void insert_bitmap(block_address b, unsigned n) {
check_block(b);
if (n > 3)
throw runtime_error("bitmap can only hold 2 bit values");
index_entry ie = indexes_->find_ie(b / ENTRIES_PER_BLOCK);
bitmap bm(tm_, ie, bitmap_validator_);
bm.insert(b % ENTRIES_PER_BLOCK, n);
indexes_->save_ie(b / ENTRIES_PER_BLOCK, bm.get_ie());
}
ref_t lookup_ref_count(block_address b) const {
uint64_t key[1] = {b};
boost::optional mvalue = ref_counts_.lookup(key);
if (!mvalue)
throw runtime_error("ref count not in tree");
return *mvalue;
}
void insert_ref_count(block_address b, ref_t count) {
uint64_t key[1] = {b};
ref_counts_.insert(key, count);
}
void remove_ref_count(block_address b) {
uint64_t key[1] = {b};
ref_counts_.remove(key);
}
transaction_manager &tm_;
bcache::validator::ptr bitmap_validator_;
index_store::ptr indexes_;
block_address nr_blocks_;
block_address nr_allocated_;
block_address search_start_;
btree<1, ref_count_traits> ref_counts_;
};
//--------------------------------
class ie_value_visitor {
public:
ie_value_visitor(index_entry_visitor &v)
: v_(v) {
}
virtual void visit(btree_path const &path, sm_disk_detail::index_entry const &ie) {
// FIXME: finish
}
private:
index_entry_visitor &v_;
};
class ie_damage_visitor {
public:
ie_damage_visitor(index_entry_visitor &v)
: v_(v) {
}
virtual void visit(btree_path const &path, btree_detail::damage const &d) {
// FIXME: finish
}
private:
index_entry_visitor &v_;
};
class btree_index_store : public index_store {
public:
typedef boost::shared_ptr ptr;
btree_index_store(transaction_manager &tm)
: tm_(tm),
bitmaps_(tm, index_entry_traits::ref_counter()) {
}
btree_index_store(transaction_manager &tm,
block_address root)
: tm_(tm),
bitmaps_(tm, root, index_entry_traits::ref_counter()) {
}
//--------------------------------
struct index_entry_counter {
index_entry_counter(block_counter &bc)
: bc_(bc) {
}
void visit(btree_detail::node_location const &loc, index_entry const &ie) {
if (ie.blocknr_ != 0)
bc_.inc(ie.blocknr_);
}
private:
block_counter &bc_;
};
virtual void count_metadata(block_counter &bc) const {
index_entry_counter vc(bc);
count_btree_blocks(bitmaps_, bc, vc);
}
//--------------------------------
virtual void resize(block_address nr_entries) {
// No op
}
virtual index_entry find_ie(block_address ie_index) const {
uint64_t key[1] = {ie_index};
boost::optional mindex = bitmaps_.lookup(key);
if (!mindex)
throw runtime_error("Couldn't lookup bitmap");
return *mindex;
}
virtual void save_ie(block_address ie_index, struct index_entry ie) {
uint64_t key[1] = {ie_index};
bitmaps_.insert(key, ie);
}
virtual void commit_ies() {
// No op
}
virtual index_store::ptr clone() const {
return index_store::ptr(new btree_index_store(tm_, bitmaps_.get_root()));
}
virtual block_address get_root() const {
return bitmaps_.get_root();
}
virtual void visit(index_entry_visitor &v, block_address nr_index_entries) const {
ie_value_visitor vv(v);
ie_damage_visitor dv(v);
btree_visit_values(bitmaps_, vv, dv);
}
private:
transaction_manager &tm_;
btree<1, index_entry_traits> bitmaps_;
};
class metadata_index_store : public index_store {
public:
typedef boost::shared_ptr ptr;
metadata_index_store(transaction_manager &tm)
: tm_(tm) {
block_manager<>::write_ref wr = tm_.new_block(index_validator());
bitmap_root_ = wr.get_location();
}
metadata_index_store(transaction_manager &tm, block_address root, block_address nr_indexes)
: tm_(tm),
bitmap_root_(root) {
resize(nr_indexes);
load_ies();
}
virtual void count_metadata(block_counter &bc) const {
bc.inc(bitmap_root_);
for (unsigned i = 0; i < entries_.size(); i++) {
block_address b = entries_[i].blocknr_;
if (b != 0)
bc.inc(b);
}
}
virtual void resize(block_address nr_indexes) {
entries_.resize(nr_indexes);
}
virtual index_entry find_ie(block_address ie_index) const {
return entries_[ie_index];
}
virtual void save_ie(block_address ie_index, struct index_entry ie) {
entries_[ie_index] = ie;
}
virtual void commit_ies() {
std::pair::write_ref, bool> p =
tm_.shadow(bitmap_root_, index_validator());
bitmap_root_ = p.first.get_location();
metadata_index *mdi = reinterpret_cast(p.first.data());
for (unsigned i = 0; i < entries_.size(); i++)
index_entry_traits::pack(entries_[i], mdi->index[i]);
}
virtual index_store::ptr clone() const {
return index_store::ptr(new metadata_index_store(tm_, bitmap_root_, entries_.size()));
}
virtual block_address get_root() const {
return bitmap_root_;
}
virtual void visit(index_entry_visitor &vv, block_address nr_index_entries) const {
for (unsigned i = 0; i < entries_.size(); i++)
if (entries_[i].blocknr_ != 0)
vv.visit(entries_[i]);
#if 0
counter.inc(bitmap_root_);
for (unsigned i = 0; i < entries_.size(); i++)
// FIXME: this looks like a hack
if (entries_[i].blocknr_ != 0) // superblock
counter.inc(entries_[i].blocknr_);
#endif
}
private:
void load_ies() {
block_manager<>::read_ref rr =
tm_.read_lock(bitmap_root_, index_validator());
metadata_index const *mdi = reinterpret_cast(rr.data());
for (unsigned i = 0; i < entries_.size(); i++)
index_entry_traits::unpack(*(mdi->index + i), entries_[i]);
}
transaction_manager &tm_;
block_address bitmap_root_;
std::vector entries_;
};
}
//----------------------------------------------------------------
checked_space_map::ptr
persistent_data::create_disk_sm(transaction_manager &tm,
block_address nr_blocks)
{
index_store::ptr store(new btree_index_store(tm));
checked_space_map::ptr sm(new sm_disk(store, tm));
sm->extend(nr_blocks);
sm->commit();
return sm;
}
checked_space_map::ptr
persistent_data::open_disk_sm(transaction_manager &tm, void const *root)
{
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
index_store::ptr store(new btree_index_store(tm, v.bitmap_root_));
return checked_space_map::ptr(new sm_disk(store, tm, v));
}
checked_space_map::ptr
persistent_data::create_metadata_sm(transaction_manager &tm, block_address nr_blocks)
{
index_store::ptr store(new metadata_index_store(tm));
checked_space_map::ptr sm(new sm_disk(store, tm));
if (nr_blocks > MAX_METADATA_BLOCKS) {
cerr << "truncating metadata device to " << MAX_METADATA_BLOCKS << " 4k blocks\n";
nr_blocks = MAX_METADATA_BLOCKS;
}
sm->extend(nr_blocks);
sm->commit();
return create_careful_alloc_sm(
create_recursive_sm(sm));
}
checked_space_map::ptr
persistent_data::open_metadata_sm(transaction_manager &tm, void const *root)
{
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
block_address nr_indexes = div_up(v.nr_blocks_, ENTRIES_PER_BLOCK);
index_store::ptr store(new metadata_index_store(tm, v.bitmap_root_, nr_indexes));
return create_careful_alloc_sm(
create_recursive_sm(
checked_space_map::ptr(new sm_disk(store, tm, v))));
}
bcache::validator::ptr
persistent_data::bitmap_validator() {
return bcache::validator::ptr(new bitmap_block_validator());
}
bcache::validator::ptr
persistent_data::index_validator() {
return bcache::validator::ptr(new index_block_validator());
}
block_address
persistent_data::get_nr_blocks_in_data_sm(transaction_manager &tm, void *root)
{
sm_root_disk d;
sm_root v;
::memcpy(&d, root, sizeof(d));
sm_root_traits::unpack(d, v);
return v.nr_blocks_;
}
//----------------------------------------------------------------