955e11bc28
Encapsulate file descriptor into an object, to ensure that an fd will be closed properly while exception raised, e.g., the block_cache throws exception during the block_manager's construction.
302 lines
6.5 KiB
C++
302 lines
6.5 KiB
C++
#ifndef BLOCK_CACHE_H
|
|
#define BLOCK_CACHE_H
|
|
|
|
#include "base/container_of.h"
|
|
#include "base/file_utils.h"
|
|
|
|
#include <boost/intrusive/list.hpp>
|
|
#include <boost/intrusive/set.hpp>
|
|
#include <boost/noncopyable.hpp>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <libaio.h>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <vector>
|
|
#include <iostream>
|
|
|
|
namespace bi = boost::intrusive;
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
namespace bcache {
|
|
typedef uint64_t block_address;
|
|
typedef uint64_t sector_t;
|
|
|
|
class validator {
|
|
public:
|
|
typedef boost::shared_ptr<validator> ptr;
|
|
|
|
virtual ~validator() {}
|
|
|
|
virtual void check(void const *data, block_address location) const = 0;
|
|
virtual bool check_raw(void const *data) const = 0;
|
|
virtual void prepare(void *data, block_address location) const = 0;
|
|
};
|
|
|
|
class noop_validator : public validator {
|
|
public:
|
|
void check(void const *data, block_address location) const {}
|
|
bool check_raw(void const *data) const {return true;}
|
|
void prepare(void *data, block_address location) const {}
|
|
};
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
class block_cache : private boost::noncopyable {
|
|
public:
|
|
enum block_flags {
|
|
BF_IO_PENDING = (1 << 0),
|
|
BF_DIRTY = (1 << 1),
|
|
BF_FLUSH = (1 << 2),
|
|
};
|
|
|
|
class block : private boost::noncopyable {
|
|
public:
|
|
block()
|
|
: v_() {
|
|
}
|
|
|
|
bool operator <(block const &rhs) const {
|
|
return index_ > rhs.index_;
|
|
}
|
|
|
|
bool operator ==(block const &rhs) const {
|
|
return index_ == rhs.index_;
|
|
}
|
|
|
|
// Do not give this class a destructor, it wont get
|
|
// called because we manage allocation ourselves.
|
|
|
|
uint64_t get_index() const {
|
|
return index_;
|
|
}
|
|
|
|
void *get_data() const {
|
|
return data_;
|
|
}
|
|
|
|
void mark_dirty() {
|
|
set_flags(BF_DIRTY);
|
|
}
|
|
|
|
void set_flags(unsigned flags) {
|
|
flags_ |= flags;
|
|
}
|
|
|
|
unsigned test_flags(unsigned flags) const {
|
|
return flags_ & flags;
|
|
}
|
|
|
|
void clear_flags(unsigned flags) {
|
|
flags_ &= ~flags;
|
|
}
|
|
|
|
void get() {
|
|
ref_count_++;
|
|
};
|
|
|
|
void put() {
|
|
if (!ref_count_)
|
|
throw std::runtime_error("bad put");
|
|
|
|
if (!--ref_count_)
|
|
bc_->release(*this);
|
|
}
|
|
|
|
void unlink_set() {
|
|
set_hook_.unlink();
|
|
}
|
|
|
|
void unlink() {
|
|
list_hook_.unlink();
|
|
}
|
|
|
|
private:
|
|
friend class block_cache;
|
|
friend class cmp_index;
|
|
|
|
block_cache *bc_;
|
|
|
|
uint64_t index_;
|
|
void *data_;
|
|
|
|
bi::list_member_hook<bi::link_mode<bi::auto_unlink>> list_hook_;
|
|
bi::set_member_hook<bi::link_mode<bi::auto_unlink>> set_hook_;
|
|
|
|
unsigned ref_count_;
|
|
|
|
int error_;
|
|
unsigned flags_;
|
|
|
|
iocb control_block_;
|
|
validator::ptr v_;
|
|
};
|
|
|
|
struct cmp_index {
|
|
bool operator()(block_address index, block const &b) const {
|
|
return index > b.index_;
|
|
}
|
|
|
|
bool operator()(block const &b, block_address index) const {
|
|
return b.index_ > index;
|
|
}
|
|
};
|
|
|
|
class auto_block {
|
|
public:
|
|
auto_block()
|
|
: b_(0) {
|
|
}
|
|
|
|
auto_block(block &b)
|
|
: b_(&b) {
|
|
}
|
|
|
|
~auto_block() {
|
|
put();
|
|
}
|
|
|
|
auto_block &operator =(block &b) {
|
|
put();
|
|
b_ = &b;
|
|
return *this;
|
|
}
|
|
|
|
void *get_data() const {
|
|
if (b_)
|
|
return b_->get_data();
|
|
|
|
throw std::runtime_error("auto_block not set");
|
|
}
|
|
|
|
private:
|
|
void put() {
|
|
if (b_) {
|
|
b_->put();
|
|
b_ = 0;
|
|
}
|
|
}
|
|
|
|
block *b_;
|
|
};
|
|
|
|
//--------------------------------
|
|
|
|
block_cache(file_utils::file_descriptor &fd, sector_t block_size,
|
|
uint64_t max_nr_blocks, size_t mem);
|
|
~block_cache();
|
|
|
|
uint64_t get_nr_blocks() const;
|
|
uint64_t get_nr_locked() const;
|
|
|
|
enum get_flags {
|
|
GF_ZERO = (1 << 0),
|
|
GF_DIRTY = (1 << 1),
|
|
GF_BARRIER = (1 << 2)
|
|
};
|
|
|
|
block_cache::block &get(block_address index, unsigned flags, validator::ptr v);
|
|
|
|
/*
|
|
* Flush can fail if an earlier write failed. You do not know which block
|
|
* failed. Make sure you build your recovery with this in mind.
|
|
*/
|
|
int flush();
|
|
void prefetch(block_address index);
|
|
|
|
private:
|
|
typedef bi::member_hook<block,
|
|
bi::list_member_hook<bi::link_mode<bi::auto_unlink>>,
|
|
&block::list_hook_> list_hook_option;
|
|
typedef bi::list<block, list_hook_option,
|
|
bi::constant_time_size<false>> block_list;
|
|
|
|
int init_free_list(unsigned count);
|
|
block *__alloc_block();
|
|
void complete_io(block &b, int result);
|
|
void issue_low_level(block &b, enum io_iocb_cmd opcode, const char *desc);
|
|
void issue_read(block &b);
|
|
void issue_write(block &b);
|
|
void wait_io();
|
|
void unlink_block(block &b);
|
|
void link_block(block &b);
|
|
void relink(block &b);
|
|
void wait_all();
|
|
void wait_specific(block &b);
|
|
unsigned writeback(unsigned count);
|
|
void setup_control_block(block &b);
|
|
block *find_unused_clean_block();
|
|
block *new_block(block_address index);
|
|
void mark_dirty(block &b);
|
|
unsigned calc_nr_cache_blocks(size_t mem, sector_t block_size);
|
|
unsigned calc_nr_buckets(unsigned nr_blocks);
|
|
void zero_block(block &b);
|
|
block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v);
|
|
void exit_free_list();
|
|
|
|
void preemptive_writeback();
|
|
bool maybe_flush(block_cache::block &b);
|
|
void release(block_cache::block &block);
|
|
void check_index(block_address index) const;
|
|
|
|
void hit(block &b, unsigned flags);
|
|
void miss(unsigned flags);
|
|
|
|
//--------------------------------
|
|
|
|
file_utils::file_descriptor &fd_;
|
|
sector_t block_size_;
|
|
uint64_t nr_data_blocks_;
|
|
uint64_t nr_cache_blocks_;
|
|
|
|
std::unique_ptr<std::vector<block>> blocks_memory_;
|
|
unsigned char *blocks_data_;
|
|
|
|
io_context_t aio_context_;
|
|
std::vector<io_event> events_;
|
|
|
|
/*
|
|
* Blocks on the free list are not initialised, apart from the
|
|
* b.data field.
|
|
*/
|
|
block_list free_;
|
|
block_list errored_;
|
|
block_list dirty_;
|
|
block_list clean_;
|
|
|
|
// Because the block_list type doesn't have a constant time
|
|
// size() method, we have to manually keep track of the list
|
|
// sizes (tedious and error prone).
|
|
unsigned nr_locked_;
|
|
unsigned nr_dirty_;
|
|
|
|
unsigned nr_io_pending_;
|
|
block_list io_pending_;
|
|
|
|
typedef bi::member_hook<block,
|
|
bi::set_member_hook<bi::link_mode<bi::auto_unlink>>,
|
|
&block::set_hook_> block_option;
|
|
typedef bi::set<block, block_option,
|
|
bi::constant_time_size<false>> block_set;
|
|
block_set block_set_;
|
|
|
|
// Stats
|
|
unsigned read_hits_;
|
|
unsigned read_misses_;
|
|
unsigned write_zeroes_;
|
|
unsigned write_hits_;
|
|
unsigned write_misses_;
|
|
unsigned prefetches_;
|
|
|
|
validator::ptr noop_validator_;
|
|
};
|
|
}
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
#endif
|