[thin_journal_check] Checks journal of block manager activity.

You need to apply doc/bm-journal.patch to create the journal.

thin_journal_check confirms that if the machine had crashed at any time
during the test run no metadata corruption would have occured.
This commit is contained in:
Joe Thornber 2018-09-24 14:51:46 +01:00
parent 70cdfe12a2
commit de7c9a5781
6 changed files with 1761 additions and 94 deletions

1
bin/thin_journal_check Symbolic link
View File

@ -0,0 +1 @@
pdata_tools

1505
doc/bm-journal.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,7 @@ thin_provisioning::register_thin_commands(base::application &app)
app.add_cmd(command::ptr(new thin_generate_metadata_cmd())); app.add_cmd(command::ptr(new thin_generate_metadata_cmd()));
app.add_cmd(command::ptr(new thin_show_duplicates_cmd())); app.add_cmd(command::ptr(new thin_show_duplicates_cmd()));
app.add_cmd(command::ptr(new thin_show_metadata_cmd())); app.add_cmd(command::ptr(new thin_show_metadata_cmd()));
app.add_cmd(command::ptr(new thin_journal_cmd()));
#endif #endif
} }

View File

@ -38,17 +38,22 @@ byte_stream::read_bytes(uint8_t *b, uint8_t *e)
{ {
while (b != e) while (b != e)
b += read_some_(b, e); b += read_some_(b, e);
assert(b == e);
} }
void void
byte_stream::next_block_() byte_stream::next_block_()
{ {
current_block_++; current_block_++;
cursor_ = 0;
} }
size_t size_t
byte_stream::read_some_(uint8_t *b, uint8_t *e) byte_stream::read_some_(uint8_t *b, uint8_t *e)
{ {
assert(cursor_ <= JOURNAL_BLOCK_SIZE);
if (cursor_ == JOURNAL_BLOCK_SIZE) if (cursor_ == JOURNAL_BLOCK_SIZE)
next_block_(); next_block_();
@ -69,6 +74,29 @@ journal_msg::journal_msg(bool success)
{ {
} }
open_journal_msg::open_journal_msg(uint64_t nr_metadata_blocks)
: journal_msg(true),
nr_metadata_blocks_(nr_metadata_blocks)
{
}
void
open_journal_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
close_journal_msg::close_journal_msg()
: journal_msg(true)
{
}
void
close_journal_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
block_msg::block_msg(bool success, uint64_t index) block_msg::block_msg(bool success, uint64_t index)
: journal_msg(success), index_(index) : journal_msg(success), index_(index)
{ {
@ -228,9 +256,18 @@ journal::read_one_(struct journal_visitor &v)
uint8_t header = read_<uint8_t>(); uint8_t header = read_<uint8_t>();
uint8_t t = header >> 1; uint8_t t = header >> 1;
uint8_t success = header & 0x1; uint8_t success = header & 0x1;
uint64_t index; uint64_t index, nr_blocks;
switch (static_cast<msg_type>(t)) { switch (static_cast<msg_type>(t)) {
case MT_OPEN_JOURNAL:
nr_blocks = read_<uint64_t>();
v.visit(open_journal_msg(nr_blocks));
break;
case MT_CLOSE_JOURNAL:
v.visit(close_journal_msg());
return false;
case MT_READ_LOCK: case MT_READ_LOCK:
index = read_<uint64_t>(); index = read_<uint64_t>();
v.visit(read_lock_msg(success, index)); v.visit(read_lock_msg(success, index));
@ -273,6 +310,7 @@ journal::read_one_(struct journal_visitor &v)
break; break;
case MT_FLUSH_AND_UNLOCK: { case MT_FLUSH_AND_UNLOCK: {
cerr << "reading flush_and_unlock msg\n";
index = read_<uint64_t>(); index = read_<uint64_t>();
auto deltas = read_deltas_(); auto deltas = read_deltas_();
v.visit(flush_and_unlock_msg(success, index, deltas)); v.visit(flush_and_unlock_msg(success, index, deltas));
@ -291,9 +329,6 @@ journal::read_one_(struct journal_visitor &v)
case MT_SET_READ_WRITE: case MT_SET_READ_WRITE:
v.visit(set_read_write_msg()); v.visit(set_read_write_msg());
break; break;
case MT_END_OF_JOURNAL:
return false;
} }
return true; return true;
@ -302,14 +337,16 @@ journal::read_one_(struct journal_visitor &v)
bool bool
journal::read_delta_(delta_list &ds) journal::read_delta_(delta_list &ds)
{ {
uint8_t chunk = read_<uint8_t>(); uint16_t chunk = read_<uint16_t>();
if (chunk == 0xff) if (chunk == 0xffff)
return false; return false;
assert(chunk < JOURNAL_NR_CHUNKS);
auto bytes = vector<uint8_t>(JOURNAL_CHUNK_SIZE, 0); auto bytes = vector<uint8_t>(JOURNAL_CHUNK_SIZE, 0);
in_.read_bytes(bytes.data(), bytes.data() + JOURNAL_CHUNK_SIZE); in_.read_bytes(bytes.data(), bytes.data() + JOURNAL_CHUNK_SIZE);
ds.push_back(delta(chunk, bytes)); ds.push_back(delta(chunk * JOURNAL_CHUNK_SIZE, bytes));
return true; return true;
} }

View File

@ -27,8 +27,8 @@
namespace thin_provisioning { namespace thin_provisioning {
uint32_t const JOURNAL_BLOCK_SIZE = 256 * 1024; uint32_t const JOURNAL_BLOCK_SIZE = 256 * 1024;
uint32_t const JOURNAL_NR_CHUNKS = 32; uint32_t const JOURNAL_CHUNK_SIZE = 32;
uint32_t const JOURNAL_CHUNK_SIZE = 4096 / JOURNAL_NR_CHUNKS; uint32_t const JOURNAL_NR_CHUNKS = (4096 / JOURNAL_CHUNK_SIZE);
class byte_stream { class byte_stream {
public: public:
@ -59,6 +59,17 @@ namespace thin_provisioning {
bool success_; bool success_;
}; };
struct open_journal_msg : public journal_msg {
open_journal_msg(uint64_t nr_metadata_blocks);
virtual void visit(journal_visitor &v) const;
uint64_t nr_metadata_blocks_;
};
struct close_journal_msg : public journal_msg {
close_journal_msg();
virtual void visit(journal_visitor &v) const;
};
struct block_msg : public journal_msg { struct block_msg : public journal_msg {
block_msg(bool success, uint64_t index); block_msg(bool success, uint64_t index);
uint64_t index_; uint64_t index_;
@ -148,6 +159,8 @@ namespace thin_provisioning {
msg.visit(*this); msg.visit(*this);
} }
virtual void visit(open_journal_msg const &msg) = 0;
virtual void visit(close_journal_msg const &msg) = 0;
virtual void visit(read_lock_msg const &msg) = 0; virtual void visit(read_lock_msg const &msg) = 0;
virtual void visit(write_lock_msg const &msg) = 0; virtual void visit(write_lock_msg const &msg) = 0;
virtual void visit(zero_lock_msg const &msg) = 0; virtual void visit(zero_lock_msg const &msg) = 0;
@ -163,7 +176,10 @@ namespace thin_provisioning {
}; };
enum msg_type { enum msg_type {
MT_READ_LOCK = 0, MT_OPEN_JOURNAL,
MT_CLOSE_JOURNAL,
MT_READ_LOCK,
MT_WRITE_LOCK, MT_WRITE_LOCK,
MT_ZERO_LOCK, MT_ZERO_LOCK,
MT_TRY_READ_LOCK, MT_TRY_READ_LOCK,
@ -175,7 +191,6 @@ namespace thin_provisioning {
MT_PREFETCH, MT_PREFETCH,
MT_SET_READ_ONLY, MT_SET_READ_ONLY,
MT_SET_READ_WRITE, MT_SET_READ_WRITE,
MT_END_OF_JOURNAL,
}; };
class journal { class journal {

View File

@ -54,6 +54,97 @@ using namespace thin_provisioning;
//---------------------------------------------------------------- //----------------------------------------------------------------
namespace { namespace {
class journal_display : public journal_visitor {
public:
journal_display(journal_visitor &inner)
: inner_(inner) {
}
virtual void visit(open_journal_msg const &msg) {
cout << "open_journal\n";
inner_.visit(msg);
}
virtual void visit(close_journal_msg const &msg) {
cout << "close_journal\n";
inner_.visit(msg);
}
virtual void visit(read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(write_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "write_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(zero_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "zero_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(try_read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "try_read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(verify_msg const &msg) {
if (interesting(msg.index_))
cout << "verify " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prepare_msg const &msg) {
if (interesting(msg.index_))
cout << "prepare " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(flush_msg const &msg) {
cout << "flush\n";
inner_.visit(msg);
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "flush_and_unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prefetch_msg const &msg) {
if (interesting(msg.index_))
cout << "prefetch " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(set_read_only_msg const &msg) {
cout << "set_read_only\n";
inner_.visit(msg);
}
virtual void visit(set_read_write_msg const &msg) {
cout << "set_read_write\n";
inner_.visit(msg);
}
bool interesting(block_address b) const {
return true;
}
journal_visitor &inner_;
};
unsigned const MAX_HELD_LOCKS = 16; unsigned const MAX_HELD_LOCKS = 16;
@ -62,28 +153,44 @@ namespace {
// Need to track updates to the superblock to define transactions. // Need to track updates to the superblock to define transactions.
class checker : public journal_visitor { class checker : public journal_visitor {
public: public:
checker(block_address &nr_metadata_blocks) virtual void visit(open_journal_msg const &msg) {
: bm_(new block_manager<>("metadata.tmp", nr_metadata_blocks, MAX_HELD_LOCKS, block_manager<>::CREATE)) { bm_.reset(new block_manager<>("metadata.tmp", msg.nr_metadata_blocks_,
MAX_HELD_LOCKS, block_manager<>::CREATE));
}
virtual void visit(close_journal_msg const &msg) {
// noop
} }
virtual void visit(read_lock_msg const &msg) { virtual void visit(read_lock_msg const &msg) {
read_lock_(msg.index_); if (msg.success_)
read_lock_(msg.index_);
} }
virtual void visit(write_lock_msg const &msg) { virtual void visit(write_lock_msg const &msg) {
write_lock_(msg.index_); if (msg.success_)
write_lock_(msg.index_);
} }
virtual void visit(zero_lock_msg const &msg) { virtual void visit(zero_lock_msg const &msg) {
write_lock_(msg.index_); if (msg.success_) {
write_lock_(msg.index_);
zero_(msg.index_);
}
} }
virtual void visit(try_read_lock_msg const &msg) { virtual void visit(try_read_lock_msg const &msg) {
read_lock_(msg.index_); if (msg.success_)
read_lock_(msg.index_);
} }
virtual void visit(unlock_msg const &msg) { virtual void visit(unlock_msg const &msg) {
bool write_locked = is_write_locked_(msg.index_);
unlock_(msg.index_, msg.deltas_); unlock_(msg.index_, msg.deltas_);
if (write_locked && msg.index_ == superblock_detail::SUPERBLOCK_LOCATION)
commit_();
} }
virtual void visit(verify_msg const &msg) { virtual void visit(verify_msg const &msg) {
@ -95,17 +202,17 @@ namespace {
} }
virtual void visit(flush_msg const &msg) { virtual void visit(flush_msg const &msg) {
cerr << "spurious flush()\n"; cout << "WARN: spurious flush()\n";
} }
virtual void visit(flush_and_unlock_msg const &msg) { virtual void visit(flush_and_unlock_msg const &msg) {
if (msg.index_ != superblock_detail::SUPERBLOCK_LOCATION) { if (msg.index_ != superblock_detail::SUPERBLOCK_LOCATION) {
cerr << "flush_and_unlock received for block " << msg.index_ cout << "ERROR: flush_and_unlock received for block " << msg.index_
<< ", which isn't the superblock\n"; << ", which isn't the superblock\n";
throw runtime_error("bad flush_and_unlock");
} }
commit(msg.deltas_); unlock_(msg.index_, msg.deltas_);
commit_();
} }
virtual void visit(prefetch_msg const &msg) { virtual void visit(prefetch_msg const &msg) {
@ -122,55 +229,68 @@ namespace {
private: private:
void read_lock_(block_address b) { void read_lock_(block_address b) {
if (write_locks_.count(b)) { auto it = locks_.find(b);
cerr << "read lock taken concurrently with write lock for block " if (it == locks_.end())
<< b << "\n"; locks_.insert(make_pair(b, -1));
throw runtime_error("bad read lock");
}
auto it = read_locks_.find(b); else if (it->second > 0) {
if (it == read_locks_.end()) cout << "WARN: read lock taken concurrently with write lock for block "
read_locks_.insert(make_pair(b, 1)); << b << "\n";
else
it->second++; } else
--it->second;
} }
void write_lock_(block_address b) { void write_lock_(block_address b) {
if (active_.count(b)) { if (is_superblock_(b)) {
cerr << "write lock taken for block " if (locks_.size())
cout << "WARN: superblock taken when locks still held\n";
} else if (active_.count(b)) {
cout << "ERROR: write lock taken for block "
<< b << b
<< ", but it is still in the active transaction\n"; << ", but it is still in the active transaction\n";
throw runtime_error("bad write lock"); throw runtime_error("bad write_lock");
} }
if (write_locks_.count(b)) { auto it = locks_.find(b);
cerr << "write lock already held for block " if (it == locks_.end())
<< b locks_.insert(make_pair(b, 1));
<< "\n";
throw runtime_error("bad write lock");
}
if (read_locks_.count(b)) { else if (it->second < 0) {
cerr << "read lock requested for write locked block " cout << "WARN: write lock requested for read locked block "
<< b << "\n"; << b << "\n";
throw runtime_error("bad write lock"); } else
} it->second++;
write_locks_.insert(b);
} }
bool is_write_locked_(block_address b) const {
auto it = locks_.find(b);
return it != locks_.end() && it->second > 0;
}
void unlock_(block_address b, delta_list const &deltas) { void unlock_(block_address b, delta_list const &deltas) {
if (write_locks_.count(b)) { auto it = locks_.find(b);
write_locks_.erase(b); if (it == locks_.end() || !it->second) {
cout << "ERROR: unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (it->second < 0) {
it->second++;
if (deltas.size()) {
cout << "ERROR: unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
} else {
auto wr = bm_->write_lock(b); auto wr = bm_->write_lock(b);
for (auto &&d : deltas) { for (auto &&d : deltas) {
uint8_t *data = static_cast<uint8_t *>(wr.data()); uint8_t *data = static_cast<uint8_t *>(wr.data());
if (d.offset_ + d.bytes_.size() > 4096) { if (d.offset_ + d.bytes_.size() > 4096) {
cerr << "delta for block " << b << " is out of range (" cout << "ERROR: delta for block " << b << " is out of range ("
<< d.offset_ << ", " << d.offset_ + d.bytes_.size() << "]\n"; << d.offset_ << ", " << d.offset_ + d.bytes_.size() << "]\n";
throw runtime_error("bad unlock"); throw runtime_error("bad unlock");
} }
@ -178,46 +298,28 @@ namespace {
memcpy(data + d.offset_, d.bytes_.data(), d.bytes_.size()); memcpy(data + d.offset_, d.bytes_.data(), d.bytes_.size());
} }
} else { it->second--;
auto it = read_locks_.find(b);
if (it == read_locks_.end()) {
cerr << "unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (deltas.size()) {
cerr << "unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
// Decrement lock
if (!it->second) {
cerr << "read lock entry has zero count (internal error)\n";
throw runtime_error("bad unlock");
}
if (!--it->second)
read_locks_.erase(it);
} }
if (!it->second)
locks_.erase(it);
} }
void commit(delta_list const &deltas) { void zero_(block_address b) {
auto wr = bm_->write_lock_zero(b);
}
void commit_() {
using namespace thin_provisioning::superblock_detail;
// At this point the only lock held should be the superblock, // At this point the only lock held should be the superblock,
// and that should be a write lock. // and that should be a write lock.
if (read_locks_.size()) { if (locks_.size() != 0) {
cerr << "committing when the following read locks are still held:\n"; cout << "ERROR: committing when the following locks are still held:\n";
for (auto &&p : read_locks_) for (auto &&p : locks_)
cerr << p.first << "\n"; if (p.first != SUPERBLOCK_LOCATION)
} cerr << p.first << "\n";
throw runtime_error("bad commit");
unlock_(superblock_detail::SUPERBLOCK_LOCATION, deltas);
if (write_locks_.size()) {
cerr << "commit() called, but the following write locks are held:\n";
for (auto &&b : write_locks_)
cerr << b << "\n";
} }
build_active_set_(); build_active_set_();
@ -226,6 +328,7 @@ namespace {
void build_active_set_() { void build_active_set_() {
using namespace thin_provisioning::superblock_detail; using namespace thin_provisioning::superblock_detail;
cerr << "build active set\n";
superblock sb = read_superblock(bm_); superblock sb = read_superblock(bm_);
block_counter bc; block_counter bc;
@ -242,20 +345,24 @@ namespace {
for (auto &&p : bc.get_counts()) { for (auto &&p : bc.get_counts()) {
if (!p.second) { if (!p.second) {
cerr << "weird zero count for block " << p.first << "\n"; cout << "weird zero count for block " << p.first << "\n";
throw runtime_error("build_active_set() failed");
} }
active_.insert(p.first); active_.insert(p.first);
} }
} }
bool is_superblock_(block_address b) const {
return b == superblock_detail::SUPERBLOCK_LOCATION;
}
typedef set<block_address> block_set; typedef set<block_address> block_set;
typedef map<block_address, unsigned> block_map;
// write locks positive, unlocked 0, read locks negative
typedef map<block_address, int> block_map;
block_set active_; block_set active_;
block_set write_locks_; block_map locks_;
block_map read_locks_;
block_manager<>::ptr bm_; block_manager<>::ptr bm_;
transaction_manager::ptr tm_; transaction_manager::ptr tm_;
@ -269,13 +376,14 @@ namespace {
bool quiet; bool quiet;
}; };
void check(string const &path, block_address nr_metadata_blocks) { void check(string const &path) {
block_address journal_size = get_file_length(path) / JOURNAL_BLOCK_SIZE; block_address journal_size = get_file_length(path) / JOURNAL_BLOCK_SIZE;
block_manager<JOURNAL_BLOCK_SIZE>::ptr bm( block_manager<JOURNAL_BLOCK_SIZE>::ptr bm(
new block_manager<JOURNAL_BLOCK_SIZE>(path, journal_size, 4, new block_manager<JOURNAL_BLOCK_SIZE>(path, journal_size, 4,
block_manager<JOURNAL_BLOCK_SIZE>::READ_ONLY)); block_manager<JOURNAL_BLOCK_SIZE>::READ_ONLY));
journal j(bm); journal j(bm);
checker c(nr_metadata_blocks); checker c;
journal_display dc(c);
j.read_journal(c); j.read_journal(c);
} }
@ -291,7 +399,7 @@ thin_journal_cmd::thin_journal_cmd()
void void
thin_journal_cmd::usage(std::ostream &out) const thin_journal_cmd::usage(std::ostream &out) const
{ {
out << "Usage: " << get_name() << " [options] {device|file} {nr blocks}" << endl out << "Usage: " << get_name() << " [options] {device|file}" << endl
<< "Options:\n" << "Options:\n"
<< " {-q|--quiet}\n" << " {-q|--quiet}\n"
<< " {-h|--help}\n" << " {-h|--help}\n"
@ -332,7 +440,7 @@ thin_journal_cmd::run(int argc, char **argv)
} }
} }
if (argc - optind != 2) { if (argc - optind != 1) {
if (!fs.quiet) if (!fs.quiet)
usage(cerr); usage(cerr);
@ -340,7 +448,7 @@ thin_journal_cmd::run(int argc, char **argv)
} }
try { try {
check(argv[optind], lexical_cast<block_address>(argv[optind + 1])); check(argv[optind]);
} catch (std::exception &e) { } catch (std::exception &e) {
cerr << e.what() << "\n"; cerr << e.what() << "\n";