[thin_journal_check] Checks journal of block manager activity.

You need to apply doc/bm-journal.patch to create the journal.

thin_journal_check confirms that if the machine had crashed at any time
during the test run no metadata corruption would have occured.
This commit is contained in:
Joe Thornber 2018-09-24 14:51:46 +01:00
parent 70cdfe12a2
commit de7c9a5781
6 changed files with 1761 additions and 94 deletions

1
bin/thin_journal_check Symbolic link
View File

@ -0,0 +1 @@
pdata_tools

1505
doc/bm-journal.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,7 @@ thin_provisioning::register_thin_commands(base::application &app)
app.add_cmd(command::ptr(new thin_generate_metadata_cmd()));
app.add_cmd(command::ptr(new thin_show_duplicates_cmd()));
app.add_cmd(command::ptr(new thin_show_metadata_cmd()));
app.add_cmd(command::ptr(new thin_journal_cmd()));
#endif
}

View File

@ -38,17 +38,22 @@ byte_stream::read_bytes(uint8_t *b, uint8_t *e)
{
while (b != e)
b += read_some_(b, e);
assert(b == e);
}
void
byte_stream::next_block_()
{
current_block_++;
cursor_ = 0;
}
size_t
byte_stream::read_some_(uint8_t *b, uint8_t *e)
{
assert(cursor_ <= JOURNAL_BLOCK_SIZE);
if (cursor_ == JOURNAL_BLOCK_SIZE)
next_block_();
@ -69,6 +74,29 @@ journal_msg::journal_msg(bool success)
{
}
open_journal_msg::open_journal_msg(uint64_t nr_metadata_blocks)
: journal_msg(true),
nr_metadata_blocks_(nr_metadata_blocks)
{
}
void
open_journal_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
close_journal_msg::close_journal_msg()
: journal_msg(true)
{
}
void
close_journal_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
block_msg::block_msg(bool success, uint64_t index)
: journal_msg(success), index_(index)
{
@ -228,9 +256,18 @@ journal::read_one_(struct journal_visitor &v)
uint8_t header = read_<uint8_t>();
uint8_t t = header >> 1;
uint8_t success = header & 0x1;
uint64_t index;
uint64_t index, nr_blocks;
switch (static_cast<msg_type>(t)) {
case MT_OPEN_JOURNAL:
nr_blocks = read_<uint64_t>();
v.visit(open_journal_msg(nr_blocks));
break;
case MT_CLOSE_JOURNAL:
v.visit(close_journal_msg());
return false;
case MT_READ_LOCK:
index = read_<uint64_t>();
v.visit(read_lock_msg(success, index));
@ -273,6 +310,7 @@ journal::read_one_(struct journal_visitor &v)
break;
case MT_FLUSH_AND_UNLOCK: {
cerr << "reading flush_and_unlock msg\n";
index = read_<uint64_t>();
auto deltas = read_deltas_();
v.visit(flush_and_unlock_msg(success, index, deltas));
@ -291,9 +329,6 @@ journal::read_one_(struct journal_visitor &v)
case MT_SET_READ_WRITE:
v.visit(set_read_write_msg());
break;
case MT_END_OF_JOURNAL:
return false;
}
return true;
@ -302,14 +337,16 @@ journal::read_one_(struct journal_visitor &v)
bool
journal::read_delta_(delta_list &ds)
{
uint8_t chunk = read_<uint8_t>();
uint16_t chunk = read_<uint16_t>();
if (chunk == 0xff)
if (chunk == 0xffff)
return false;
assert(chunk < JOURNAL_NR_CHUNKS);
auto bytes = vector<uint8_t>(JOURNAL_CHUNK_SIZE, 0);
in_.read_bytes(bytes.data(), bytes.data() + JOURNAL_CHUNK_SIZE);
ds.push_back(delta(chunk, bytes));
ds.push_back(delta(chunk * JOURNAL_CHUNK_SIZE, bytes));
return true;
}

View File

@ -27,8 +27,8 @@
namespace thin_provisioning {
uint32_t const JOURNAL_BLOCK_SIZE = 256 * 1024;
uint32_t const JOURNAL_NR_CHUNKS = 32;
uint32_t const JOURNAL_CHUNK_SIZE = 4096 / JOURNAL_NR_CHUNKS;
uint32_t const JOURNAL_CHUNK_SIZE = 32;
uint32_t const JOURNAL_NR_CHUNKS = (4096 / JOURNAL_CHUNK_SIZE);
class byte_stream {
public:
@ -59,6 +59,17 @@ namespace thin_provisioning {
bool success_;
};
struct open_journal_msg : public journal_msg {
open_journal_msg(uint64_t nr_metadata_blocks);
virtual void visit(journal_visitor &v) const;
uint64_t nr_metadata_blocks_;
};
struct close_journal_msg : public journal_msg {
close_journal_msg();
virtual void visit(journal_visitor &v) const;
};
struct block_msg : public journal_msg {
block_msg(bool success, uint64_t index);
uint64_t index_;
@ -148,6 +159,8 @@ namespace thin_provisioning {
msg.visit(*this);
}
virtual void visit(open_journal_msg const &msg) = 0;
virtual void visit(close_journal_msg const &msg) = 0;
virtual void visit(read_lock_msg const &msg) = 0;
virtual void visit(write_lock_msg const &msg) = 0;
virtual void visit(zero_lock_msg const &msg) = 0;
@ -163,7 +176,10 @@ namespace thin_provisioning {
};
enum msg_type {
MT_READ_LOCK = 0,
MT_OPEN_JOURNAL,
MT_CLOSE_JOURNAL,
MT_READ_LOCK,
MT_WRITE_LOCK,
MT_ZERO_LOCK,
MT_TRY_READ_LOCK,
@ -175,7 +191,6 @@ namespace thin_provisioning {
MT_PREFETCH,
MT_SET_READ_ONLY,
MT_SET_READ_WRITE,
MT_END_OF_JOURNAL,
};
class journal {

View File

@ -54,6 +54,97 @@ using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
class journal_display : public journal_visitor {
public:
journal_display(journal_visitor &inner)
: inner_(inner) {
}
virtual void visit(open_journal_msg const &msg) {
cout << "open_journal\n";
inner_.visit(msg);
}
virtual void visit(close_journal_msg const &msg) {
cout << "close_journal\n";
inner_.visit(msg);
}
virtual void visit(read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(write_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "write_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(zero_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "zero_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(try_read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "try_read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(verify_msg const &msg) {
if (interesting(msg.index_))
cout << "verify " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prepare_msg const &msg) {
if (interesting(msg.index_))
cout << "prepare " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(flush_msg const &msg) {
cout << "flush\n";
inner_.visit(msg);
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "flush_and_unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prefetch_msg const &msg) {
if (interesting(msg.index_))
cout << "prefetch " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(set_read_only_msg const &msg) {
cout << "set_read_only\n";
inner_.visit(msg);
}
virtual void visit(set_read_write_msg const &msg) {
cout << "set_read_write\n";
inner_.visit(msg);
}
bool interesting(block_address b) const {
return true;
}
journal_visitor &inner_;
};
unsigned const MAX_HELD_LOCKS = 16;
@ -62,28 +153,44 @@ namespace {
// Need to track updates to the superblock to define transactions.
class checker : public journal_visitor {
public:
checker(block_address &nr_metadata_blocks)
: bm_(new block_manager<>("metadata.tmp", nr_metadata_blocks, MAX_HELD_LOCKS, block_manager<>::CREATE)) {
virtual void visit(open_journal_msg const &msg) {
bm_.reset(new block_manager<>("metadata.tmp", msg.nr_metadata_blocks_,
MAX_HELD_LOCKS, block_manager<>::CREATE));
}
virtual void visit(close_journal_msg const &msg) {
// noop
}
virtual void visit(read_lock_msg const &msg) {
read_lock_(msg.index_);
if (msg.success_)
read_lock_(msg.index_);
}
virtual void visit(write_lock_msg const &msg) {
write_lock_(msg.index_);
if (msg.success_)
write_lock_(msg.index_);
}
virtual void visit(zero_lock_msg const &msg) {
write_lock_(msg.index_);
if (msg.success_) {
write_lock_(msg.index_);
zero_(msg.index_);
}
}
virtual void visit(try_read_lock_msg const &msg) {
read_lock_(msg.index_);
if (msg.success_)
read_lock_(msg.index_);
}
virtual void visit(unlock_msg const &msg) {
bool write_locked = is_write_locked_(msg.index_);
unlock_(msg.index_, msg.deltas_);
if (write_locked && msg.index_ == superblock_detail::SUPERBLOCK_LOCATION)
commit_();
}
virtual void visit(verify_msg const &msg) {
@ -95,17 +202,17 @@ namespace {
}
virtual void visit(flush_msg const &msg) {
cerr << "spurious flush()\n";
cout << "WARN: spurious flush()\n";
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (msg.index_ != superblock_detail::SUPERBLOCK_LOCATION) {
cerr << "flush_and_unlock received for block " << msg.index_
cout << "ERROR: flush_and_unlock received for block " << msg.index_
<< ", which isn't the superblock\n";
throw runtime_error("bad flush_and_unlock");
}
commit(msg.deltas_);
unlock_(msg.index_, msg.deltas_);
commit_();
}
virtual void visit(prefetch_msg const &msg) {
@ -122,55 +229,68 @@ namespace {
private:
void read_lock_(block_address b) {
if (write_locks_.count(b)) {
cerr << "read lock taken concurrently with write lock for block "
<< b << "\n";
throw runtime_error("bad read lock");
}
auto it = locks_.find(b);
if (it == locks_.end())
locks_.insert(make_pair(b, -1));
auto it = read_locks_.find(b);
if (it == read_locks_.end())
read_locks_.insert(make_pair(b, 1));
else
it->second++;
else if (it->second > 0) {
cout << "WARN: read lock taken concurrently with write lock for block "
<< b << "\n";
} else
--it->second;
}
void write_lock_(block_address b) {
if (active_.count(b)) {
cerr << "write lock taken for block "
if (is_superblock_(b)) {
if (locks_.size())
cout << "WARN: superblock taken when locks still held\n";
} else if (active_.count(b)) {
cout << "ERROR: write lock taken for block "
<< b
<< ", but it is still in the active transaction\n";
throw runtime_error("bad write lock");
throw runtime_error("bad write_lock");
}
if (write_locks_.count(b)) {
cerr << "write lock already held for block "
<< b
<< "\n";
throw runtime_error("bad write lock");
}
auto it = locks_.find(b);
if (it == locks_.end())
locks_.insert(make_pair(b, 1));
if (read_locks_.count(b)) {
cerr << "read lock requested for write locked block "
else if (it->second < 0) {
cout << "WARN: write lock requested for read locked block "
<< b << "\n";
throw runtime_error("bad write lock");
}
write_locks_.insert(b);
} else
it->second++;
}
bool is_write_locked_(block_address b) const {
auto it = locks_.find(b);
return it != locks_.end() && it->second > 0;
}
void unlock_(block_address b, delta_list const &deltas) {
if (write_locks_.count(b)) {
write_locks_.erase(b);
auto it = locks_.find(b);
if (it == locks_.end() || !it->second) {
cout << "ERROR: unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (it->second < 0) {
it->second++;
if (deltas.size()) {
cout << "ERROR: unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
} else {
auto wr = bm_->write_lock(b);
for (auto &&d : deltas) {
uint8_t *data = static_cast<uint8_t *>(wr.data());
if (d.offset_ + d.bytes_.size() > 4096) {
cerr << "delta for block " << b << " is out of range ("
cout << "ERROR: delta for block " << b << " is out of range ("
<< d.offset_ << ", " << d.offset_ + d.bytes_.size() << "]\n";
throw runtime_error("bad unlock");
}
@ -178,46 +298,28 @@ namespace {
memcpy(data + d.offset_, d.bytes_.data(), d.bytes_.size());
}
} else {
auto it = read_locks_.find(b);
if (it == read_locks_.end()) {
cerr << "unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (deltas.size()) {
cerr << "unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
// Decrement lock
if (!it->second) {
cerr << "read lock entry has zero count (internal error)\n";
throw runtime_error("bad unlock");
}
if (!--it->second)
read_locks_.erase(it);
it->second--;
}
if (!it->second)
locks_.erase(it);
}
void commit(delta_list const &deltas) {
void zero_(block_address b) {
auto wr = bm_->write_lock_zero(b);
}
void commit_() {
using namespace thin_provisioning::superblock_detail;
// At this point the only lock held should be the superblock,
// and that should be a write lock.
if (read_locks_.size()) {
cerr << "committing when the following read locks are still held:\n";
for (auto &&p : read_locks_)
cerr << p.first << "\n";
}
unlock_(superblock_detail::SUPERBLOCK_LOCATION, deltas);
if (write_locks_.size()) {
cerr << "commit() called, but the following write locks are held:\n";
for (auto &&b : write_locks_)
cerr << b << "\n";
if (locks_.size() != 0) {
cout << "ERROR: committing when the following locks are still held:\n";
for (auto &&p : locks_)
if (p.first != SUPERBLOCK_LOCATION)
cerr << p.first << "\n";
throw runtime_error("bad commit");
}
build_active_set_();
@ -226,6 +328,7 @@ namespace {
void build_active_set_() {
using namespace thin_provisioning::superblock_detail;
cerr << "build active set\n";
superblock sb = read_superblock(bm_);
block_counter bc;
@ -242,20 +345,24 @@ namespace {
for (auto &&p : bc.get_counts()) {
if (!p.second) {
cerr << "weird zero count for block " << p.first << "\n";
throw runtime_error("build_active_set() failed");
cout << "weird zero count for block " << p.first << "\n";
}
active_.insert(p.first);
}
}
bool is_superblock_(block_address b) const {
return b == superblock_detail::SUPERBLOCK_LOCATION;
}
typedef set<block_address> block_set;
typedef map<block_address, unsigned> block_map;
// write locks positive, unlocked 0, read locks negative
typedef map<block_address, int> block_map;
block_set active_;
block_set write_locks_;
block_map read_locks_;
block_map locks_;
block_manager<>::ptr bm_;
transaction_manager::ptr tm_;
@ -269,13 +376,14 @@ namespace {
bool quiet;
};
void check(string const &path, block_address nr_metadata_blocks) {
void check(string const &path) {
block_address journal_size = get_file_length(path) / JOURNAL_BLOCK_SIZE;
block_manager<JOURNAL_BLOCK_SIZE>::ptr bm(
new block_manager<JOURNAL_BLOCK_SIZE>(path, journal_size, 4,
block_manager<JOURNAL_BLOCK_SIZE>::READ_ONLY));
journal j(bm);
checker c(nr_metadata_blocks);
checker c;
journal_display dc(c);
j.read_journal(c);
}
@ -291,7 +399,7 @@ thin_journal_cmd::thin_journal_cmd()
void
thin_journal_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options] {device|file} {nr blocks}" << endl
out << "Usage: " << get_name() << " [options] {device|file}" << endl
<< "Options:\n"
<< " {-q|--quiet}\n"
<< " {-h|--help}\n"
@ -332,7 +440,7 @@ thin_journal_cmd::run(int argc, char **argv)
}
}
if (argc - optind != 2) {
if (argc - optind != 1) {
if (!fs.quiet)
usage(cerr);
@ -340,7 +448,7 @@ thin_journal_cmd::run(int argc, char **argv)
}
try {
check(argv[optind], lexical_cast<block_address>(argv[optind + 1]));
check(argv[optind]);
} catch (std::exception &e) {
cerr << e.what() << "\n";