From 506b0a8a080799c219b3d7cc7890fe36b5efce88 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 4 Sep 2015 11:10:19 +0100 Subject: [PATCH] [thin_show_dups] inline some hash functions --- base/rolling_hash.cc | 71 +------------------------------------------- base/rolling_hash.h | 60 +++++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 75 deletions(-) diff --git a/base/rolling_hash.cc b/base/rolling_hash.cc index 9c6e1bf..8de7ac3 100644 --- a/base/rolling_hash.cc +++ b/base/rolling_hash.cc @@ -2,15 +2,11 @@ using namespace base; using namespace boost; +using namespace hash_detail; using namespace std; //---------------------------------------------------------------- -namespace { - uint32_t MULTIPLIER = 4294967291UL; - uint32_t SEED = 123; -} - rolling_hash::rolling_hash(unsigned window_size) : a_(MULTIPLIER), a_to_k_minus_1_(a_), @@ -35,28 +31,6 @@ rolling_hash::reset() } } -uint32_t -rolling_hash::step(uint8_t byte) -{ - update_hash(byte); - return hash_; -} - -uint32_t -rolling_hash::get_hash() const -{ - return hash_; -} - -void -rolling_hash::update_hash(uint8_t byte) -{ - hash_ -= a_to_k_minus_1_ * (chars_.front() + SEED); - chars_.pop_front(); - chars_.push_back(byte); - hash_ = (hash_ * a_) + byte + SEED; -} - //-------------------------------- content_based_hash::content_based_hash(unsigned window_size) @@ -79,47 +53,4 @@ content_based_hash::reset() rhash_.reset(); } -optional -content_based_hash::step(uint8_t byte) -{ - optional r; - - rhash_.step(byte); - len_++; - - if (len_ < min_len_) - return r; - - if (hit_break(backup_div_)) - backup_break_ = len_; - - if (hit_break(div_)) { - // found a break - r = len_; - len_ = 0; - backup_break_.reset(); - - } else if (len_ >= max_len_) { - // too big, is there a backup? - if (backup_break_) { - len_ -= *backup_break_; - r = backup_break_; - backup_break_.reset(); - - } else { - r = len_; - len_ = 0; - } - } - - return r; -} - -bool -content_based_hash::hit_break(uint32_t mask) const -{ - uint32_t h = rhash_.get_hash() >> 8; - return !(h & mask); -} - //---------------------------------------------------------------- diff --git a/base/rolling_hash.h b/base/rolling_hash.h index d44012a..c5fa44c 100644 --- a/base/rolling_hash.h +++ b/base/rolling_hash.h @@ -8,6 +8,11 @@ //---------------------------------------------------------------- namespace base { + namespace hash_detail { + uint32_t const MULTIPLIER = 4294967291UL; + uint32_t const SEED = 123; + } + class rolling_hash { public: rolling_hash(unsigned window_size); @@ -15,12 +20,22 @@ namespace base { void reset(); // Returns the current hash - uint32_t step(uint8_t byte); + uint32_t step(uint8_t byte) { + update_hash(byte); + return hash_; + } - uint32_t get_hash() const; + uint32_t get_hash() const { + return hash_; + } private: - void update_hash(uint8_t byte); + void update_hash(uint8_t byte) { + hash_ -= a_to_k_minus_1_ * (chars_.front() + hash_detail::SEED); + chars_.pop_front(); + chars_.push_back(byte); + hash_ = (hash_ * a_) + byte + hash_detail::SEED; + } uint32_t a_; uint32_t a_to_k_minus_1_; @@ -38,10 +53,45 @@ namespace base { void reset(); // Returns a break point relative to the last reset/break. - boost::optional step(uint8_t byte); + boost::optional step(uint8_t byte) { + boost::optional r; + + rhash_.step(byte); + len_++; + + if (len_ < min_len_) + return r; + + if (hit_break(backup_div_)) + backup_break_ = len_; + + if (hit_break(div_)) { + // found a break + r = len_; + len_ = 0; + backup_break_.reset(); + + } else if (len_ >= max_len_) { + // too big, is there a backup? + if (backup_break_) { + len_ -= *backup_break_; + r = backup_break_; + backup_break_.reset(); + + } else { + r = len_; + len_ = 0; + } + } + + return r; + } private: - bool hit_break(uint32_t div) const; + bool hit_break(uint32_t mask) const { + uint32_t h = rhash_.get_hash() >> 8; + return !(h & mask); + } rolling_hash rhash_;