Message ID | 513096A1.8020008@dlhnet.de |
---|---|
State | New |
Headers | show |
On 03/01/13 12:53, Peter Lieven wrote: > instead of a linear mapping we use a multiplicative hash > with the golden ratio to derive the cache bucket from the > address. this helps to reduce collisions if memory positions > are multiple of the cache size and it avoids a division > in the position calculation. > > Signed-off-by: Peter Lieven <pl@kamp.de> > --- > page_cache.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/page_cache.c b/page_cache.c > index 376f1db..45d769a 100644 > --- a/page_cache.c > +++ b/page_cache.c > @@ -24,6 +24,7 @@ > #include <strings.h> > > #include "qemu-common.h" > +#include "qemu/host-utils.h" > #include "migration/page_cache.h" > > #ifdef DEBUG_CACHE > @@ -48,6 +49,7 @@ struct PageCache { > int64_t max_num_items; > uint64_t max_item_age; > int64_t num_items; > + uint64_t hash_shift_bits; > }; > > PageCache *cache_init(int64_t num_pages, unsigned int page_size) > @@ -72,6 +74,7 @@ PageCache *cache_init(int64_t num_pages, unsigned int > page_size) > cache->num_items = 0; > cache->max_item_age = 0; > cache->max_num_items = num_pages; > + cache->hash_shift_bits = clz64(num_pages-1); > > DPRINTF("Setting cache buckets to %" PRId64 "\n", > cache->max_num_items); > > @@ -108,7 +111,7 @@ static size_t cache_get_cache_pos(const PageCache > *cache, > size_t pos; > > g_assert(cache->max_num_items); > - pos = (address / cache->page_size) & (cache->max_num_items - 1); > + pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits; > return pos; > } > According to <http://www.brpreiss.com/books/opus4/html/page214.html>, the multiplier "is chosen as the integer that is relatively prime to" 2^64 "which is closest to" (sqrt(5)-1)/2 * 2^64. (sqrt(5)-1)/2 * 2^64 ~= 11400714819323198485.86699842797038469120 hence the constant would be a=0x9e3779b97f4a7c15. Any reason why a-2 is used in the patch? (Note: this is not a review or any suggestion to change the patch; I'm just curious.) A google-fight between "a" and "a-2" is inconclusive. So is stackoverflow: http://stackoverflow.com/questions/4113278/64-bit-multiplicative-hashing http://stackoverflow.com/questions/8513911/how-to-create-a-good-hash-combine-with-64-bit-output-inspired-by-boosthash-co Thanks Laszlo
On 01.03.2013 13:50, Laszlo Ersek wrote: > On 03/01/13 12:53, Peter Lieven wrote: >> instead of a linear mapping we use a multiplicative hash >> with the golden ratio to derive the cache bucket from the >> address. this helps to reduce collisions if memory positions >> are multiple of the cache size and it avoids a division >> in the position calculation. >> >> Signed-off-by: Peter Lieven <pl@kamp.de> >> --- >> page_cache.c | 5 ++++- >> 1 file changed, 4 insertions(+), 1 deletion(-) >> >> diff --git a/page_cache.c b/page_cache.c >> index 376f1db..45d769a 100644 >> --- a/page_cache.c >> +++ b/page_cache.c >> @@ -24,6 +24,7 @@ >> #include <strings.h> >> >> #include "qemu-common.h" >> +#include "qemu/host-utils.h" >> #include "migration/page_cache.h" >> >> #ifdef DEBUG_CACHE >> @@ -48,6 +49,7 @@ struct PageCache { >> int64_t max_num_items; >> uint64_t max_item_age; >> int64_t num_items; >> + uint64_t hash_shift_bits; >> }; >> >> PageCache *cache_init(int64_t num_pages, unsigned int page_size) >> @@ -72,6 +74,7 @@ PageCache *cache_init(int64_t num_pages, unsigned int >> page_size) >> cache->num_items = 0; >> cache->max_item_age = 0; >> cache->max_num_items = num_pages; >> + cache->hash_shift_bits = clz64(num_pages-1); >> >> DPRINTF("Setting cache buckets to %" PRId64 "\n", >> cache->max_num_items); >> >> @@ -108,7 +111,7 @@ static size_t cache_get_cache_pos(const PageCache >> *cache, >> size_t pos; >> >> g_assert(cache->max_num_items); >> - pos = (address / cache->page_size) & (cache->max_num_items - 1); >> + pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits; >> return pos; >> } >> > > According to <http://www.brpreiss.com/books/opus4/html/page214.html>, > the multiplier "is chosen as the integer that is relatively prime to" > 2^64 "which is closest to" (sqrt(5)-1)/2 * 2^64. > > (sqrt(5)-1)/2 * 2^64 ~= 11400714819323198485.86699842797038469120 > > hence the constant would be a=0x9e3779b97f4a7c15. Any reason why a-2 is > used in the patch? no, actually I only googled this value and did not calculate it myself. Peter
diff --git a/page_cache.c b/page_cache.c index 376f1db..45d769a 100644 --- a/page_cache.c +++ b/page_cache.c @@ -24,6 +24,7 @@ #include <strings.h> #include "qemu-common.h" +#include "qemu/host-utils.h" #include "migration/page_cache.h" #ifdef DEBUG_CACHE @@ -48,6 +49,7 @@ struct PageCache { int64_t max_num_items; uint64_t max_item_age; int64_t num_items; + uint64_t hash_shift_bits; }; PageCache *cache_init(int64_t num_pages, unsigned int page_size) @@ -72,6 +74,7 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size) cache->num_items = 0; cache->max_item_age = 0; cache->max_num_items = num_pages; + cache->hash_shift_bits = clz64(num_pages-1); DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items); @@ -108,7 +111,7 @@ static size_t cache_get_cache_pos(const PageCache *cache, size_t pos; g_assert(cache->max_num_items); - pos = (address / cache->page_size) & (cache->max_num_items - 1); + pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits; return pos; }
instead of a linear mapping we use a multiplicative hash with the golden ratio to derive the cache bucket from the address. this helps to reduce collisions if memory positions are multiple of the cache size and it avoids a division in the position calculation. Signed-off-by: Peter Lieven <pl@kamp.de> --- page_cache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)