154 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			154 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include "gmock/gmock.h"
 | |
| 
 | |
| #include "base/rolling_hash.h"
 | |
| 
 | |
| using namespace base;
 | |
| using namespace boost;
 | |
| using namespace std;
 | |
| using namespace testing;
 | |
| 
 | |
| //----------------------------------------------------------------
 | |
| 
 | |
| namespace {
 | |
| 	class RollingHashTests : public Test {
 | |
| 	public:
 | |
| 		RollingHashTests()
 | |
| 			: window_size_(4096),
 | |
| 			  rhash_(window_size_) {
 | |
| 		}
 | |
| 
 | |
| 		typedef vector<uint8_t> bytes;
 | |
| 		bytes random_bytes(unsigned count) {
 | |
| 			bytes v(count, 0);
 | |
| 
 | |
| 			for (unsigned i = 0; i < count; i++)
 | |
| 				v[i] = random_byte();
 | |
| 
 | |
| 			return v;
 | |
| 		}
 | |
| 
 | |
| 		uint8_t random_byte() const {
 | |
| 			return random() % 256;
 | |
| 		}
 | |
| 
 | |
| 		void apply_bytes(bytes const &bs) {
 | |
| 			for (unsigned i = 0; i < bs.size(); i++)
 | |
| 				rhash_.step(bs[i]);
 | |
| 		}
 | |
| 
 | |
| 		unsigned window_size_;
 | |
| 		rolling_hash rhash_;
 | |
| 	};
 | |
| 
 | |
| 	class ContentBasedHashTests : public Test {
 | |
| 	public:
 | |
| 		ContentBasedHashTests()
 | |
| 			: window_size_(8192),
 | |
| 			  h_(window_size_) {
 | |
| 		}
 | |
| 
 | |
| 		typedef vector<uint8_t> bytes;
 | |
| 		bytes random_bytes(unsigned count) {
 | |
| 			bytes v(count, 0);
 | |
| 
 | |
| 			for (unsigned i = 0; i < count; i++)
 | |
| 				v[i] = random_byte();
 | |
| 
 | |
| 			return v;
 | |
| 		}
 | |
| 
 | |
| 		uint8_t random_byte() const {
 | |
| 			return random() % 256;
 | |
| 		}
 | |
| 
 | |
| 		unsigned window_size_;
 | |
| 		content_based_hash h_;
 | |
| 	};
 | |
| }
 | |
| 
 | |
| //----------------------------------------------------------------
 | |
| 
 | |
| TEST_F(RollingHashTests, ctr)
 | |
| {
 | |
| }
 | |
| 
 | |
| //--------------------------------
 | |
| 
 | |
| TEST_F(RollingHashTests, hash_changes)
 | |
| {
 | |
| 	bytes bs = random_bytes(window_size_ * 100);
 | |
| 
 | |
| 	uint32_t prev = rhash_.get_hash();
 | |
| 	for (unsigned i = 0; i < bs.size(); i++) {
 | |
| 		rhash_.step(bs[i]);
 | |
| 		ASSERT_NE(rhash_.get_hash(), prev);
 | |
| 		prev = rhash_.get_hash();
 | |
| 	}
 | |
| }
 | |
| 
 | |
| TEST_F(RollingHashTests, hash_repeats)
 | |
| {
 | |
| 	bytes bs = random_bytes(window_size_);
 | |
| 
 | |
| 	apply_bytes(bs);
 | |
| 	uint32_t h1 = rhash_.get_hash();
 | |
| 	apply_bytes(bs);
 | |
| 
 | |
| 	ASSERT_EQ(rhash_.get_hash(), h1);
 | |
| }
 | |
| 
 | |
| TEST_F(RollingHashTests, reset_is_deterministic)
 | |
| {
 | |
| 	uint8_t bytes[] = "lksdfuwerh,sdg";
 | |
| 
 | |
| 	for (unsigned i = 0; i < sizeof(bytes) - 1; i++)
 | |
| 		rhash_.step(bytes[i]);
 | |
| 
 | |
| 	uint32_t h1 = rhash_.get_hash();
 | |
| 
 | |
| 	rhash_.reset();
 | |
| 
 | |
| 	for (unsigned i = 0; i < sizeof(bytes) - 1; i++)
 | |
| 		rhash_.step(bytes[i]);
 | |
| 
 | |
| 	uint32_t h2 = rhash_.get_hash();
 | |
| 
 | |
| 	ASSERT_EQ(h1, h2);
 | |
| }
 | |
| 
 | |
| //----------------------------------------------------------------
 | |
| 
 | |
| TEST_F(ContentBasedHashTests, ctr)
 | |
| {
 | |
| }
 | |
| 
 | |
| TEST_F(ContentBasedHashTests, chunk_limits_respected)
 | |
| {
 | |
| 	unsigned min = 100000, max = 0;
 | |
| 
 | |
| 	bytes bs = random_bytes(1024 * 1024 * 100);
 | |
| 	vector<unsigned> counts(window_size_, 0);
 | |
| 
 | |
| 	for (unsigned i = 0; i < bs.size(); i++) {
 | |
| 		optional<unsigned> b = h_.step(bs[i]);
 | |
| 		if (b) {
 | |
| 			counts[*b]++;
 | |
| 
 | |
| 			if (*b < min)
 | |
| 				min = *b;
 | |
| 
 | |
| 			if (*b > max)
 | |
| 				max = *b;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| #if 1
 | |
| 	for (unsigned i = 0; i < counts.size(); i++)
 | |
| 		cerr << i << ": " << counts[i] << "\n";
 | |
| 
 | |
| 	cerr << "min: " << min << ", max: " << max << "\n";
 | |
| #endif
 | |
| }
 | |
| 
 | |
| //----------------------------------------------------------------
 |