This commit is contained in:
Joe Thornber 2014-07-28 14:13:28 +01:00
parent ab6d31f136
commit 840be1b6c9
4 changed files with 155 additions and 203 deletions

View File

@ -11,6 +11,7 @@
#include <iostream>
#include <stdexcept>
#include <sstream>
//----------------------------------------------------------------
@ -126,14 +127,14 @@ namespace bcache {
block_cache::complete_io(block &b, int result)
{
b.error_ = result;
clear_flags(b, IO_PENDING);
b.clear_flags(BF_IO_PENDING);
nr_io_pending_--;
if (b.error_)
list_move_tail(&b.list_, &errored_);
else {
if (test_flags(b, DIRTY)) {
clear_flags(b, DIRTY);
if (b.test_flags(BF_DIRTY)) {
b.clear_flags(BF_DIRTY | BF_PREVIOUSLY_DIRTY);
nr_dirty_--;
}
@ -152,8 +153,8 @@ namespace bcache {
iocb *control_blocks[1];
// FIXME: put this back in
assert(!test_flags(b, IO_PENDING));
set_flags(b, IO_PENDING);
assert(!b.test_flags(BF_IO_PENDING));
b.set_flags(BF_IO_PENDING);
nr_io_pending_++;
list_move_tail(&b.list_, &io_pending_);
@ -177,14 +178,14 @@ namespace bcache {
int
block_cache::issue_read(block &b)
{
assert(!test_flags(b, IO_PENDING));
assert(!b.test_flags(BF_IO_PENDING));
return issue_low_level(b, IO_CMD_PREAD, "read");
}
int
block_cache::issue_write(block &b)
{
assert(!test_flags(b, IO_PENDING));
assert(!b.test_flags(BF_IO_PENDING));
b.v_->prepare(b.data_, b.index_);
return issue_low_level(b, IO_CMD_PWRITE, "write");
}
@ -213,7 +214,8 @@ namespace bcache {
complete_io(*b, e.res);
else
info("incomplete io, unexpected: %d\n", r);
info("incomplete io for block %llu, unexpected: %d\n",
b->index_, e.res);
}
}
@ -231,7 +233,7 @@ namespace bcache {
if (b.error_)
return &errored_;
return (b.flags_ & DIRTY) ? &dirty_ : &clean_;
return b.test_flags(BF_DIRTY) ? &dirty_ : &clean_;
}
void
@ -253,7 +255,7 @@ namespace bcache {
void
block_cache::wait_specific(block &b)
{
while (test_flags(b, IO_PENDING))
while (b.test_flags(BF_IO_PENDING))
wait_io();
}
@ -262,12 +264,16 @@ namespace bcache {
{
int r;
block *b, *tmp;
unsigned actual = 0;
unsigned actual = 0, dirty_length = 0;
list_for_each_entry_safe (b, tmp, &dirty_, list_) {
dirty_length++;
if (actual == count)
break;
// The block may be on the dirty list from a prior
// acquisition.
if (b->ref_count_)
continue;
@ -276,7 +282,7 @@ namespace bcache {
actual++;
}
info("writeback: requested %u, actual %u\n", count, actual);
info("writeback: requested %u, actual %u, dirty length %u\n", count, actual, dirty_length);
return actual;
}
@ -377,7 +383,7 @@ namespace bcache {
b->ref_count_ = 0;
b->error_ = 0;
clear_flags(*b, IO_PENDING | DIRTY);
b->flags_ = 0;
b->index_ = index;
setup_control_block(*b);
@ -391,16 +397,6 @@ namespace bcache {
/*----------------------------------------------------------------
* Block reference counting
*--------------------------------------------------------------*/
void
block_cache::mark_dirty(block &b)
{
if (!test_flags(b, DIRTY)) {
set_flags(b, DIRTY);
list_move_tail(&b.list_, &dirty_);
nr_dirty_++;
}
}
unsigned
block_cache::calc_nr_cache_blocks(size_t mem, sector_t block_size)
{
@ -451,8 +447,11 @@ namespace bcache {
aio_context_ = 0; /* needed or io_setup will fail */
r = io_setup(nr_cache_blocks, &aio_context_);
if (r < 0)
if (r < 0) {
std::cerr << "r = " << r << "\n";
perror("io_setup failed");
throw std::runtime_error("io_setup failed");
}
hash_init(nr_buckets);
INIT_LIST_HEAD(&free_);
@ -485,7 +484,7 @@ namespace bcache {
block_cache::zero_block(block &b)
{
memset(b.data_, 0, block_size_ << SECTOR_SHIFT);
mark_dirty(b);
b.mark_dirty();
}
block_cache::block *
@ -495,7 +494,7 @@ namespace bcache {
block *b = hash_lookup(index);
if (b) {
if (test_flags(*b, IO_PENDING))
if (b->test_flags(BF_IO_PENDING))
wait_specific(*b);
if (flags & GF_ZERO)
@ -503,24 +502,22 @@ namespace bcache {
else {
if (b->v_.get() &&
b->v_.get() != v.get() &&
test_flags(*b, DIRTY))
b->test_flags(BF_DIRTY))
b->v_->prepare(b->data_, b->index_);
}
b->v_ = v;
} else {
if (flags & GF_CAN_BLOCK) {
b = new_block(index);
if (b) {
b->v_ = v;
b = new_block(index);
if (b) {
b->v_ = v;
if (flags & GF_ZERO)
zero_block(*b);
else {
issue_read(*b);
wait_specific(*b);
v->check(b->data_, b->index_);
}
if (flags & GF_ZERO)
zero_block(*b);
else {
issue_read(*b);
wait_specific(*b);
v->check(b->data_, b->index_);
}
}
}
@ -531,12 +528,23 @@ namespace bcache {
block_cache::block &
block_cache::get(block_address index, unsigned flags, validator::ptr v)
{
check_index(index);
block *b = lookup_or_read_block(index, flags, v);
if (b) {
if (b->ref_count_)
throw std::runtime_error("block already locked");
hit(*b);
b->ref_count_++;
if (flags & GF_BARRIER)
b->set_flags(BF_FLUSH);
if (flags & GF_DIRTY)
b->set_flags(BF_DIRTY);
return *b;
}
@ -544,20 +552,39 @@ namespace bcache {
}
void
block_cache::put(block_cache::block &b, unsigned flags)
block_cache::preemptive_writeback()
{
if (b.ref_count_ == 0)
throw std::runtime_error("bad put");
unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_);
if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100))
writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available);
b.ref_count_--;
}
if (flags & PF_DIRTY) {
mark_dirty(b);
void
block_cache::release(block_cache::block &b)
{
assert(!b.ref_count_);
// FIXME: factor out
unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_);
if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100))
writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available);
#if 0
if (b.test_flags(BF_FLUSH))
flush();
#endif
if (b.test_flags(BF_DIRTY)) {
if (!b.test_flags(BF_PREVIOUSLY_DIRTY)) {
list_move_tail(&b.list_, &dirty_);
nr_dirty_++;
b.set_flags(BF_PREVIOUSLY_DIRTY);
}
#if 0
if (b.test_flags(BF_FLUSH))
flush();
else
#endif
preemptive_writeback();
b.clear_flags(BF_FLUSH);
}
}
@ -567,7 +594,7 @@ namespace bcache {
block *b, *tmp;
list_for_each_entry_safe (b, tmp, &dirty_, list_) {
if (b->ref_count_ || test_flags(*b, IO_PENDING))
if (b->ref_count_ || b->test_flags(BF_IO_PENDING))
// The superblock may well be still locked.
continue;
@ -582,6 +609,8 @@ namespace bcache {
void
block_cache::prefetch(block_address index)
{
check_index(index);
block *b = hash_lookup(index);
if (!b) {
@ -591,24 +620,15 @@ namespace bcache {
}
}
//--------------------------------
unsigned
block_cache::test_flags(block &b, unsigned flags)
{
return b.flags_ & flags;
}
void
block_cache::clear_flags(block &b, unsigned flags)
block_cache::check_index(block_address index) const
{
b.flags_ &= ~flags;
}
void
block_cache::set_flags(block &b, unsigned flags)
{
b.flags_ |= flags;
if (index >= nr_data_blocks_) {
std::ostringstream out;
out << "block out of bounds ("
<< index << " >= " << nr_data_blocks_ << ")\n";
throw std::runtime_error(out.str());
}
}
}

View File

@ -6,6 +6,7 @@
#include <boost/shared_ptr.hpp>
#include <boost/noncopyable.hpp>
#include <stdexcept>
#include <libaio.h>
#include <memory>
#include <stdint.h>
@ -36,12 +37,13 @@ namespace bcache {
//----------------------------------------------------------------
// FIXME: throw exceptions rather than returning errors
class block_cache : private boost::noncopyable {
public:
enum block_flags {
IO_PENDING = (1 << 0),
DIRTY = (1 << 1)
BF_IO_PENDING = (1 << 0),
BF_DIRTY = (1 << 1),
BF_FLUSH = (1 << 2),
BF_PREVIOUSLY_DIRTY = (1 << 3)
};
class block : private boost::noncopyable {
@ -58,16 +60,49 @@ namespace bcache {
return data_;
}
void mark_dirty() {
flags_ |= BF_DIRTY;
}
void mark_flush() {
flags_ |= BF_FLUSH;
}
void set_flags(unsigned flags) {
flags_ |= flags;
}
unsigned test_flags(unsigned flags) const {
return flags_ & flags;
}
void clear_flags(unsigned flags) {
flags_ &= ~flags;
}
void get() {
ref_count_++;
};
void put() {
if (!ref_count_)
throw std::runtime_error("bad put");
if (!--ref_count_)
bc_->release(*this);
}
private:
friend class block_cache;
block_cache *bc_;
uint64_t index_;
void *data_;
list_head list_;
list_head hash_list_;
block_cache *bc_;
unsigned ref_count_;
int error_;
@ -87,18 +122,12 @@ namespace bcache {
enum get_flags {
GF_ZERO = (1 << 0),
GF_CAN_BLOCK = (1 << 1)
GF_DIRTY = (1 << 1),
GF_BARRIER = (1 << 1)
};
// FIXME: what if !GF_CAN_BLOCK?
block_cache::block &get(block_address index, unsigned flags, validator::ptr v);
enum put_flags {
PF_DIRTY = (1 << 0),
};
void put(block_cache::block &block, unsigned flags);
/*
* Flush can fail if an earlier write failed. You do not know which block
* failed. Make sure you build your recovery with this in mind.
@ -131,9 +160,10 @@ namespace bcache {
unsigned calc_nr_buckets(unsigned nr_blocks);
void zero_block(block &b);
block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v);
unsigned test_flags(block &b, unsigned flags);
void clear_flags(block &b, unsigned flags);
void set_flags(block &b, unsigned flags);
void preemptive_writeback();
void release(block_cache::block &block);
void check_index(block_address index) const;
//--------------------------------
@ -169,60 +199,6 @@ namespace bcache {
unsigned mask_;
std::vector<list_head> buckets_;
};
#if 0
class auto_lock {
public:
auto_lock(block_cache &bc, block_address index, bool zero, validator::ptr v, unsigned put_flags)
: bc_(bc),
b_(bc.get(index, (zero ? block_cache::GF_ZERO : 0) | block_cache::GF_CAN_BLOCK, v)),
put_flags_(put_flags),
holders_(new unsigned) {
*holders_ = 1;
}
virtual ~auto_lock() {
bc_.put(b_, put_flags_);
}
auto_lock operator =(auto_lock const &rhs) {
if (this != &rhs) {
bc_ = rhs.bc_;
void const *data() const {
return b_.get_data();
}
private:
block_cache &bc_;
block_cache::block &b_;
unsigned put_flags_;
unsigned *holders_;
};
class auto_read_lock : public auto_lock {
public:
auto_read_lock(block_cache &bc, block_address index, bool zero, validator::ptr v)
: auto_lock(bc, index, zero, v, 0) {
}
using auto_lock::data();
};
class auto_write_lock : public auto_lock {
public:
auto_write_lock(block_cache &bc, block_address index, bool zero, validator::ptr v)
: auto_lock(bc, index, zero, v, block_cache::DIRTY) {
}
using auto_lock::data();
void *data() {
return b_.get_data();
}
};
#endif
}
//----------------------------------------------------------------

View File

@ -55,15 +55,11 @@ namespace persistent_data {
unsigned max_concurrent_locks,
mode m);
typedef void (*put_behaviour_fn)(block_cache &, block_cache::block &);
class read_ref {
public:
static uint32_t const BLOCK_SIZE = BlockSize;
read_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn);
read_ref(block_cache::block &b);
read_ref(read_ref const &rhs);
virtual ~read_ref();
@ -74,19 +70,14 @@ namespace persistent_data {
void const *data() const;
protected:
block_cache &bc_;
block_cache::block &b_;
put_behaviour_fn fn_;
unsigned *holders_;
};
// Inherited from read_ref, since you can read a block that's write
// locked.
class write_ref : public read_ref {
public:
write_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn);
write_ref(block_cache::block &b);
using read_ref::data;
void *data();
@ -94,9 +85,7 @@ namespace persistent_data {
class super_ref : public write_ref {
public:
super_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn);
super_ref(block_cache::block &b);
using read_ref::data;
using write_ref::data;

View File

@ -104,50 +104,23 @@ namespace {
};
namespace persistent_data {
inline void read_put(block_cache &bc, block_cache::block &b) {
bc.put(b, 0);
}
inline void write_put(block_cache &bc, block_cache::block &b) {
bc.put(b, block_cache::PF_DIRTY);
}
inline void super_put(block_cache &bc, block_cache::block &b) {
bc.flush();
bc.put(b, block_cache::PF_DIRTY);
bc.flush();
}
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn)
: bc_(bc),
b_(b),
fn_(fn),
holders_(new unsigned)
block_manager<BlockSize>::read_ref::read_ref(block_cache::block &b)
: b_(b)
{
*holders_ = 1;
}
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(read_ref const &rhs)
: bc_(rhs.bc_),
b_(rhs.b_),
fn_(rhs.fn_),
holders_(rhs.holders_)
: b_(rhs.b_)
{
(*holders_)++;
b_.get();
}
template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::~read_ref()
{
if (!--(*holders_)) {
fn_(bc_, b_);
delete holders_;
}
b_.put();
}
template <uint32_t BlockSize>
@ -155,11 +128,8 @@ namespace persistent_data {
block_manager<BlockSize>::read_ref::operator =(read_ref const &rhs)
{
if (this != &rhs) {
bc_ = rhs.bc_;
b_ = rhs.b_;
fn_ = rhs.fn_;
holders_ = rhs.holders_;
(*holders_)++;
b_.get();
}
return *this;
@ -182,10 +152,8 @@ namespace persistent_data {
//--------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::write_ref::write_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn)
: read_ref(bc, b, fn)
block_manager<BlockSize>::write_ref::write_ref(block_cache::block &b)
: read_ref(b)
{
}
@ -199,10 +167,9 @@ namespace persistent_data {
//--------------------------------
template <uint32_t BlockSize>
block_manager<BlockSize>::super_ref::super_ref(block_cache &bc,
block_cache::block &b,
put_behaviour_fn fn)
: write_ref(bc, b, fn) {
block_manager<BlockSize>::super_ref::super_ref(block_cache::block &b)
: write_ref(b)
{
}
//----------------------------------------------------------------
@ -213,7 +180,7 @@ namespace persistent_data {
unsigned max_concurrent_blocks,
mode m)
: fd_(open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE)),
bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 256)
bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 16)
{
}
@ -222,8 +189,8 @@ namespace persistent_data {
block_manager<BlockSize>::read_lock(block_address location,
typename bcache::validator::ptr v) const
{
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v);
return read_ref(bc_, b, read_put);
block_cache::block &b = bc_.get(location, 0, v);
return read_ref(b);
}
template <uint32_t BlockSize>
@ -231,8 +198,8 @@ namespace persistent_data {
block_manager<BlockSize>::write_lock(block_address location,
typename bcache::validator::ptr v)
{
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v);
return write_ref(bc_, b, write_put);
block_cache::block &b = bc_.get(location, block_cache::GF_DIRTY, v);
return write_ref(b);
}
template <uint32_t BlockSize>
@ -240,8 +207,8 @@ namespace persistent_data {
block_manager<BlockSize>::write_lock_zero(block_address location,
typename bcache::validator::ptr v)
{
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v);
return write_ref(bc_, b, write_put);
block_cache::block &b = bc_.get(location, block_cache::GF_ZERO, v);
return write_ref(b);
}
template <uint32_t BlockSize>
@ -249,8 +216,8 @@ namespace persistent_data {
block_manager<BlockSize>::superblock(block_address location,
typename bcache::validator::ptr v)
{
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v);
return super_ref(bc_, b, super_put);
block_cache::block &b = bc_.get(location, block_cache::GF_BARRIER, v);
return super_ref(b);
}
template <uint32_t BlockSize>
@ -258,8 +225,8 @@ namespace persistent_data {
block_manager<BlockSize>::superblock_zero(block_address location,
typename bcache::validator::ptr v)
{
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v);
return super_ref(bc_, b, super_put);
block_cache::block &b = bc_.get(location, block_cache::GF_ZERO | block_cache::GF_BARRIER, v);
return super_ref(b);
}
template <uint32_t BlockSize>