759407445f
- Fix error if no --block-sector provided - Fix errors on pools without mappings, or zero-length file
350 lines
8.5 KiB
C++
350 lines
8.5 KiB
C++
// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
|
|
//
|
|
// This file is part of the thin-provisioning-tools source.
|
|
//
|
|
// thin-provisioning-tools is free software: you can redistribute it
|
|
// and/or modify it under the terms of the GNU General Public License
|
|
// as published by the Free Software Foundation, either version 3 of
|
|
// the License, or (at your option) any later version.
|
|
//
|
|
// thin-provisioning-tools is distributed in the hope that it will be
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
|
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along
|
|
// with thin-provisioning-tools. If not, see
|
|
// <http://www.gnu.org/licenses/>.
|
|
|
|
#include <iostream>
|
|
#include <getopt.h>
|
|
#include <libgen.h>
|
|
|
|
#include "version.h"
|
|
|
|
#include "base/application.h"
|
|
#include "base/error_state.h"
|
|
#include "base/progress_monitor.h"
|
|
#include "persistent-data/data-structures/btree_damage_visitor.h"
|
|
#include "persistent-data/file_utils.h"
|
|
#include "persistent-data/space-maps/core.h"
|
|
#include "persistent-data/space-maps/disk.h"
|
|
#include "thin-provisioning/cache_stream.h"
|
|
#include "thin-provisioning/fixed_chunk_stream.h"
|
|
#include "thin-provisioning/pool_stream.h"
|
|
#include "thin-provisioning/commands.h"
|
|
#include "thin-provisioning/device_tree.h"
|
|
#include "thin-provisioning/mapping_tree.h"
|
|
#include "thin-provisioning/metadata.h"
|
|
#include "thin-provisioning/rmap_visitor.h"
|
|
#include "thin-provisioning/superblock.h"
|
|
#include "thin-provisioning/variable_chunk_stream.h"
|
|
|
|
#include <boost/compute/detail/sha1.hpp>
|
|
#include <boost/lexical_cast.hpp>
|
|
#include <boost/optional.hpp>
|
|
#include <deque>
|
|
#include <vector>
|
|
|
|
using namespace base;
|
|
using namespace boost;
|
|
using namespace persistent_data;
|
|
using namespace std;
|
|
using namespace thin_provisioning;
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
namespace {
|
|
bool factor_of(block_address f, block_address n) {
|
|
return f && (n % f) == 0;
|
|
}
|
|
|
|
uint64_t parse_int(string const &str, string const &desc) {
|
|
try {
|
|
return boost::lexical_cast<uint64_t>(str);
|
|
|
|
} catch (...) {
|
|
ostringstream out;
|
|
out << "Couldn't parse " << desc << ": '" << str << "'";
|
|
exit(1);
|
|
}
|
|
|
|
return 0; // never get here
|
|
}
|
|
|
|
//--------------------------------
|
|
|
|
struct flags {
|
|
flags()
|
|
: cache_mem(64 * 1024 * 1024),
|
|
content_based_chunks(false) {
|
|
}
|
|
|
|
string data_dev;
|
|
optional<string> metadata_dev;
|
|
optional<unsigned> block_size;
|
|
unsigned cache_mem;
|
|
bool content_based_chunks;
|
|
};
|
|
|
|
using namespace mapping_tree_detail;
|
|
|
|
class duplicate_counter {
|
|
public:
|
|
duplicate_counter()
|
|
: non_zero_dups_(0),
|
|
zero_dups_(0) {
|
|
}
|
|
|
|
void add_duplicate(block_address len) {
|
|
non_zero_dups_ += len;
|
|
}
|
|
|
|
void add_zero_duplicate(block_address len) {
|
|
zero_dups_ += len;
|
|
}
|
|
|
|
block_address get_total() const {
|
|
return non_zero_dups_ + zero_dups_;
|
|
}
|
|
|
|
block_address get_non_zeroes() const {
|
|
return non_zero_dups_;
|
|
}
|
|
|
|
block_address get_zeroes() const {
|
|
return zero_dups_;
|
|
}
|
|
|
|
void display_results(chunk_stream const &stream) const {
|
|
block_address meg = 1024 * 1024;
|
|
cout << "\n\n"
|
|
<< stream.size() / meg << "m examined, "
|
|
<< get_non_zeroes() / meg << "m duplicates, "
|
|
<< get_zeroes() / meg << "m zeroes\n";
|
|
}
|
|
|
|
private:
|
|
block_address non_zero_dups_;
|
|
block_address zero_dups_;
|
|
};
|
|
|
|
class duplicate_detector {
|
|
public:
|
|
void scan_with_variable_sized_chunks(chunk_stream &stream) {
|
|
if (!stream.size())
|
|
return;
|
|
variable_chunk_stream vstream(stream, 4096);
|
|
scan(vstream);
|
|
}
|
|
|
|
void scan_with_fixed_sized_chunks(chunk_stream &stream, block_address chunk_size) {
|
|
if (!stream.size())
|
|
return;
|
|
fixed_chunk_stream fstream(stream, chunk_size);
|
|
scan(fstream);
|
|
}
|
|
|
|
duplicate_counter const &get_results() const {
|
|
return results_;
|
|
}
|
|
|
|
private:
|
|
void scan(chunk_stream &stream) {
|
|
block_address total_seen(0);
|
|
unique_ptr<progress_monitor> pbar = create_progress_bar("Examining data");
|
|
|
|
do {
|
|
// FIXME: use a wrapper class to automate the put()
|
|
chunk const &c = stream.get();
|
|
examine(c);
|
|
stream.put(c);
|
|
|
|
total_seen += c.len_;
|
|
pbar->update_percent((total_seen * 100) / stream.size());
|
|
|
|
} while (stream.next());
|
|
|
|
pbar->update_percent(100);
|
|
results_.display_results(stream);
|
|
}
|
|
|
|
void examine(chunk const &c) {
|
|
if (all_zeroes(c))
|
|
results_.add_zero_duplicate(c.len_);
|
|
|
|
else {
|
|
digestor_.reset();
|
|
digestor_.process_bytes(c.mem_.begin, c.mem_.end - c.mem_.begin);
|
|
|
|
unsigned int digest[5];
|
|
digestor_.get_digest(digest);
|
|
|
|
// hack
|
|
vector<unsigned int> v(5);
|
|
for (unsigned i = 0; i < 5; i++)
|
|
v[i] = digest[i];
|
|
|
|
fingerprint_map::const_iterator it = fm_.find(v);
|
|
if (it != fm_.end()) {
|
|
results_.add_duplicate(c.len_);
|
|
} else
|
|
fm_.insert(make_pair(v, c.offset_));
|
|
}
|
|
}
|
|
|
|
bool all_zeroes(chunk const &c) const {
|
|
for (uint8_t *ptr = c.mem_.begin; ptr != c.mem_.end; ptr++) {
|
|
if (*ptr != 0)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
typedef map<vector<unsigned int>, block_address> fingerprint_map;
|
|
|
|
unsigned block_size_;
|
|
boost::uuids::detail::sha1 digestor_;
|
|
fingerprint_map fm_;
|
|
duplicate_counter results_;
|
|
};
|
|
|
|
int show_dups_pool(flags const &fs) {
|
|
block_manager::ptr bm = open_bm(*fs.metadata_dev);
|
|
transaction_manager::ptr tm =
|
|
open_tm(bm, superblock_detail::SUPERBLOCK_LOCATION);
|
|
superblock_detail::superblock sb = read_superblock(bm);
|
|
block_address block_size = sb.data_block_size_ * 512;
|
|
block_address nr_blocks = get_nr_blocks(fs.data_dev, block_size);
|
|
|
|
cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
|
|
pool_stream pstream(stream, tm, sb, nr_blocks);
|
|
|
|
duplicate_detector detector;
|
|
|
|
if (fs.content_based_chunks)
|
|
detector.scan_with_variable_sized_chunks(pstream);
|
|
else {
|
|
if (!!fs.block_size) {
|
|
if (factor_of(*fs.block_size, block_size))
|
|
block_size = *fs.block_size;
|
|
else
|
|
throw runtime_error("specified block size is not a factor of the pool chunk size\n");
|
|
}
|
|
|
|
detector.scan_with_fixed_sized_chunks(pstream, block_size);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int show_dups_linear(flags const &fs) {
|
|
if (!fs.block_size)
|
|
// FIXME: this check should be moved to the switch parsing
|
|
throw runtime_error("--block-sectors or --metadata-dev must be supplied");
|
|
|
|
block_address block_size = *fs.block_size;
|
|
block_address nr_blocks = get_nr_blocks(fs.data_dev, *fs.block_size);
|
|
|
|
cerr << "path = " << fs.data_dev << "\n";
|
|
cerr << "nr_blocks = " << nr_blocks << "\n";
|
|
cerr << "block size = " << block_size << "\n";
|
|
|
|
cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
|
|
duplicate_detector dd;
|
|
|
|
if (fs.content_based_chunks)
|
|
dd.scan_with_variable_sized_chunks(stream);
|
|
else
|
|
dd.scan_with_fixed_sized_chunks(stream, block_size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int show_dups(flags const &fs) {
|
|
if (fs.metadata_dev)
|
|
return show_dups_pool(fs);
|
|
else {
|
|
cerr << "No metadata device provided, so treating data device as a linear device\n";
|
|
return show_dups_linear(fs);
|
|
}
|
|
}
|
|
}
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
thin_show_duplicates_cmd::thin_show_duplicates_cmd()
|
|
: command("thin_show_duplicates")
|
|
{
|
|
}
|
|
|
|
void
|
|
thin_show_duplicates_cmd::usage(std::ostream &out) const
|
|
{
|
|
out << "Usage: " << get_name() << " [options] {device|file}\n"
|
|
<< "Options:\n"
|
|
<< " {--block-sectors} <integer>\n"
|
|
<< " {--content-based-chunks}\n"
|
|
<< " {--metadata-dev} <path>\n"
|
|
<< " {-h|--help}\n"
|
|
<< " {-V|--version}" << endl;
|
|
}
|
|
|
|
int
|
|
thin_show_duplicates_cmd::run(int argc, char **argv)
|
|
{
|
|
int c;
|
|
flags fs;
|
|
|
|
char const shortopts[] = "qhV";
|
|
option const longopts[] = {
|
|
{ "block-sectors", required_argument, NULL, 1},
|
|
{ "content-based-chunks", no_argument, NULL, 2},
|
|
{ "metadata-dev", required_argument, NULL, 3},
|
|
{ "help", no_argument, NULL, 'h'},
|
|
{ "version", no_argument, NULL, 'V'},
|
|
{ NULL, no_argument, NULL, 0 }
|
|
};
|
|
|
|
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
|
|
switch(c) {
|
|
case 'h':
|
|
usage(cout);
|
|
return 0;
|
|
|
|
case 'V':
|
|
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
|
|
return 0;
|
|
|
|
case 1:
|
|
fs.block_size = 512 * parse_int(optarg, "block sectors");
|
|
break;
|
|
|
|
case 2:
|
|
fs.content_based_chunks = true;
|
|
break;
|
|
|
|
case 3:
|
|
fs.metadata_dev = optarg;
|
|
break;
|
|
|
|
default:
|
|
usage(cerr);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (argc == optind) {
|
|
cerr << "No data device/file provided." << endl;
|
|
usage(cerr);
|
|
exit(1);
|
|
}
|
|
|
|
fs.data_dev = argv[optind];
|
|
|
|
return show_dups(fs);
|
|
}
|
|
|
|
//----------------------------------------------------------------
|