[thin-repair, thin_dump] When repairing we now hunt for the best btree roots.

We've had a trickle of users who accidentally activate the same pool on a VM and host at the same time. Typically the host doesn't do any IO, but the kernel will still rewrite the superblock on shutdown. This leaves the superblock pointing to very out of date btree roots and so we get massive metadata loss. This patch changes thin_repair, and thin_dump --repair. They now hunt for the most recent, undamaged and consistent roots of the device and mapping trees, and use that as the starting point of the repair.
2019-04-17 12:17:13 +01:00
parent b027a1039f
commit 9e20465fd1
4 changed files with 560 additions and 362 deletions
--- a/thin-provisioning/thin_dump.cc
+++ b/thin-provisioning/thin_dump.cc
@@ -40,12 +40,14 @@ namespace {
 	struct flags {
 		flags()
 			: format("xml"),
+			  repair(false),
 			  use_metadata_snap(false) {
 		}

 		dump_options opts;

 		string format;
+		bool repair;
 		bool use_metadata_snap;
 		optional<block_address> snap_location;
 	};
@@ -84,9 +86,15 @@ namespace {

 	int dump_(string const &path, ostream &out, struct flags &flags) {
 		try {
-			metadata::ptr md = open_metadata(path, flags);
 			emitter::ptr e = create_emitter(flags.format, out);
-			metadata_dump(md, e, flags.opts);
+
+			if (flags.repair) {
+				auto bm = open_bm(path, block_manager<>::READ_ONLY, true);
+				metadata_repair(bm, e);
+			} else {
+				metadata::ptr md = open_metadata(path, flags);
+				metadata_dump(md, e, flags.opts);
+			}

 		} catch (std::exception &e) {
 			cerr << e.what() << endl;
@@ -161,7 +169,7 @@ thin_dump_cmd::run(int argc, char **argv)
 			break;

 		case 'r':
-			flags.opts.repair_ = true;
+			flags.repair = true;
 			break;

 		case 'm':