// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools.  If not, see
// <http://www.gnu.org/licenses/>.

#include <iostream>
#include <getopt.h>
#include <libgen.h>

#include "version.h"

#include "base/application.h"
#include "base/error_state.h"
#include "base/progress_monitor.h"
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/file_utils.h"
#include "persistent-data/space-maps/core.h"
#include "persistent-data/space-maps/disk.h"
#include "thin-provisioning/cache_stream.h"
#include "thin-provisioning/fixed_chunk_stream.h"
#include "thin-provisioning/pool_stream.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/device_tree.h"
#include "thin-provisioning/mapping_tree.h"
#include "thin-provisioning/rmap_visitor.h"
#include "thin-provisioning/superblock.h"
#include "thin-provisioning/variable_chunk_stream.h"

#include <boost/uuid/sha1.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/optional.hpp>
#include <deque>
#include <vector>

using namespace base;
using namespace boost;
using namespace persistent_data;
using namespace std;
using namespace thin_provisioning;

//----------------------------------------------------------------

namespace {
	bool factor_of(block_address f, block_address n) {
		return (n % f) == 0;
	}

	block_manager<>::ptr
	open_bm(string const &path) {
		block_address nr_blocks = get_nr_blocks(path);
		block_manager<>::mode m = block_manager<>::READ_ONLY;
		return block_manager<>::ptr(new block_manager<>(path, nr_blocks, 1, m));
	}

	transaction_manager::ptr
	open_tm(block_manager<>::ptr bm) {
		space_map::ptr sm(new core_map(bm->get_nr_blocks()));
		sm->inc(superblock_detail::SUPERBLOCK_LOCATION);
		transaction_manager::ptr tm(new transaction_manager(bm, sm));
		return tm;
	}

	uint64_t parse_int(string const &str, string const &desc) {
		try {
			return boost::lexical_cast<uint64_t>(str);

		} catch (...) {
			ostringstream out;
			out << "Couldn't parse " << desc << ": '" << str << "'";
			exit(1);
		}

		return 0; // never get here
	}

	//--------------------------------

	struct flags {
		flags()
			: cache_mem(64 * 1024 * 1024),
			  content_based_chunks(false) {
		}

		string data_dev;
		optional<string> metadata_dev;
		optional<unsigned> block_size;
		unsigned cache_mem;
		bool content_based_chunks;
	};

	using namespace mapping_tree_detail;

	class duplicate_counter {
	public:
		duplicate_counter()
		: non_zero_dups_(0),
		  zero_dups_(0) {
		}

		void add_duplicate(block_address len) {
			non_zero_dups_ += len;
		}

		void add_zero_duplicate(block_address len) {
			zero_dups_ += len;
		}

		block_address get_total() const {
			return non_zero_dups_ + zero_dups_;
		}

		block_address get_non_zeroes() const {
			return non_zero_dups_;
		}

		block_address get_zeroes() const {
			return zero_dups_;
		}

		void display_results(chunk_stream const &stream) const {
			block_address meg = 1024 * 1024;
			cout << "\n\n"
			     << stream.size() / meg << "m examined, "
			     << get_non_zeroes() / meg << "m duplicates, "
			     << get_zeroes() / meg << "m zeroes\n";
		}

	private:
		block_address non_zero_dups_;
		block_address zero_dups_;
	};

	class duplicate_detector {
	public:
		void scan_with_variable_sized_chunks(chunk_stream &stream) {
			variable_chunk_stream vstream(stream, 4096);
			scan(vstream);
		}

		void scan_with_fixed_sized_chunks(chunk_stream &stream, block_address chunk_size) {
			fixed_chunk_stream fstream(stream, chunk_size);
			scan(fstream);
		}

		duplicate_counter const &get_results() const {
			return results_;
		}

	private:
		void scan(chunk_stream &stream) {
			block_address total_seen(0);
			unique_ptr<progress_monitor> pbar = create_progress_bar("Examining data");

			do {
				// FIXME: use a wrapper class to automate the put()
				chunk const &c = stream.get();
				examine(c);
				stream.put(c);

				total_seen += c.len_;
				pbar->update_percent((total_seen * 100) / stream.size());

			} while (stream.next());

			pbar->update_percent(100);
			results_.display_results(stream);
		}

		void examine(chunk const &c) {
			if (all_zeroes(c))
				results_.add_zero_duplicate(c.len_);

			else {
				digestor_.reset();
				digestor_.process_bytes(c.mem_.begin, c.mem_.end - c.mem_.begin);

				unsigned int digest[5];
				digestor_.get_digest(digest);

				// hack
				vector<unsigned int> v(5);
				for (unsigned i = 0; i < 5; i++)
					v[i] = digest[i];

				fingerprint_map::const_iterator it = fm_.find(v);
				if (it != fm_.end()) {
					results_.add_duplicate(c.len_);
				} else
					fm_.insert(make_pair(v, c.offset_));
			}
		}

		bool all_zeroes(chunk const &c) const {
			for (uint8_t *ptr = c.mem_.begin; ptr != c.mem_.end; ptr++) {
				if (*ptr != 0)
					return false;
			}

			return true;
		}

		typedef map<vector<unsigned int>, block_address> fingerprint_map;

		unsigned block_size_;
		boost::uuids::detail::sha1 digestor_;
		fingerprint_map fm_;
		duplicate_counter results_;
	};

	int show_dups_pool(flags const &fs) {
		block_manager<>::ptr bm = open_bm(*fs.metadata_dev);
		transaction_manager::ptr tm = open_tm(bm);
		superblock_detail::superblock sb = read_superblock(bm);
		block_address block_size = sb.data_block_size_ * 512;
		block_address nr_blocks = get_nr_blocks(fs.data_dev, block_size);

		cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
		pool_stream pstream(stream, tm, sb, nr_blocks);

		duplicate_detector detector;

		if (fs.content_based_chunks)
			detector.scan_with_variable_sized_chunks(pstream);
		else {
			if (*fs.block_size) {
				if (factor_of(*fs.block_size, block_size))
					block_size = *fs.block_size;
				else
					throw runtime_error("specified block size is not a factor of the pool chunk size\n");
			}

			detector.scan_with_fixed_sized_chunks(pstream, block_size);
		}

		return 0;
	}

	int show_dups_linear(flags const &fs) {
		if (!fs.block_size)
			// FIXME: this check should be moved to the switch parsing
			throw runtime_error("--block-sectors or --metadata-dev must be supplied");

		block_address block_size = *fs.block_size;
		block_address nr_blocks = get_nr_blocks(fs.data_dev, *fs.block_size);

		cerr << "path = " << fs.data_dev << "\n";
		cerr << "nr_blocks = " << nr_blocks << "\n";
		cerr << "block size = " << block_size << "\n";

		cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
		duplicate_detector dd;

		if (fs.content_based_chunks)
			dd.scan_with_variable_sized_chunks(stream);
		else
			dd.scan_with_fixed_sized_chunks(stream, block_size);

		return 0;
	}

	int show_dups(flags const &fs) {
		if (fs.metadata_dev)
			return show_dups_pool(fs);
		else {
			cerr << "No metadata device provided, so treating data device as a linear device\n";
			return show_dups_linear(fs);
		}
	}
}

//----------------------------------------------------------------

thin_show_duplicates_cmd::thin_show_duplicates_cmd()
	: command("thin_show_duplicates")
{
}

void
thin_show_duplicates_cmd::usage(std::ostream &out) const
{
	out << "Usage: " << get_name() << " [options] {device|file}\n"
	    << "Options:\n"
	    << "  {--block-sectors} <integer>\n"
	    << "  {--content-based-chunks}\n"
	    << "  {--metadata-dev} <path>\n"
	    << "  {-h|--help}\n"
	    << "  {-V|--version}" << endl;
}

int
thin_show_duplicates_cmd::run(int argc, char **argv)
{
	int c;
	flags fs;

	char const shortopts[] = "qhV";
	option const longopts[] = {
		{ "block-sectors", required_argument, NULL, 1},
		{ "content-based-chunks", no_argument, NULL, 2},
		{ "metadata-dev", required_argument, NULL, 3},
		{ "help", no_argument, NULL, 'h'},
		{ "version", no_argument, NULL, 'V'},
		{ NULL, no_argument, NULL, 0 }
	};

	while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
		switch(c) {
		case 'h':
			usage(cout);
			return 0;

		case 'V':
			cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
			return 0;

		case 1:
			fs.block_size = 512 * parse_int(optarg, "block sectors");
			break;

		case 2:
			fs.content_based_chunks = true;
			break;

		case 3:
			fs.metadata_dev = optarg;
			break;

		default:
			usage(cerr);
			return 1;
		}
	}

	if (argc == optind) {
		cerr << "No data device/file provided." << endl;
		usage(cerr);
		exit(1);
	}

	fs.data_dev = argv[optind];

	return show_dups(fs);
}

//----------------------------------------------------------------