Re: Question about tcp hash function tcp_hashfn()
- From: Evgeniy Polyakov <johnpol@xxxxxxxxxxx>
- Date: Thu, 1 Jun 2006 10:04:24 +0400
On Wed, May 31, 2006 at 11:41:27AM -0700, David Miller (davem@xxxxxxxxxxxxx) wrote:
Worse: he folded the jenkins algorith result with
h ^= h >> 16;
h ^= h >> 8;
Destroying the coverage of the function.
It was done to simulate socket code which uses the same folding.
Leaving 32bit space is just wrong, consider hash table size with that
index.
You absolutely show not do this shifting on the jenkins hash
result, you destroy the distribution entirely. Just mask
it with the hash mask and that's all you need to do.
Brian is right, this is absolutely critical to using the Jenkins hash
correctly. You're "unmixing" the bits it worked so hard to mix.
That is wrong. And I have a code and picture to show that,
and you dont - prove me wrong :)
Attached code and picture which shows _exactly_ the same distribution for
folded and not folded Jenkins hash distribution, and it's artifact
compared to XOR hash.
Fairly distributed values being XORed produce still fairly distributed
value.
--
Evgeniy Polyakov
Attachment:
hash_comparison.png
Description: PNG image
#include <stdio.h>
#include <errno.h>
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
typedef unsigned int __u32;
typedef unsigned short __u16;
typedef unsigned char __u8;
#include "jhash.h"
struct hash_entry
{
unsigned long counter;
};
static inline num2ip(__u8 a1, __u8 a2, __u8 a3, __u8 a4)
{
__u32 a = 0;
a |= a1;
a << 8;
a |= a2;
a << 8;
a |= a3;
a << 8;
a |= a4;
return a;
}
static inline __u8 get_random_byte(void)
{
return 1 + (int) (255.0 * (rand() / (RAND_MAX + 1.0)));
}
static inline __u16 get_random_word(void)
{
return 1 + (int) (65536.0 * (rand() / (RAND_MAX + 1.0)));
}
unsigned int hash_addr(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport)
{
unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
h ^= h >> 16;
h ^= h >> 8;
return h;
}
unsigned int hash_addr1(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport)
{
u32 ports;
unsigned int h;
ports = lport;
ports <<= 16;
ports |= fport;
h = jhash_2words(faddr, laddr, ports);
//h ^= h >> 16;
//h ^= h >> 8;
return h;
}
int main()
{
struct hash_entry *table;
__u32 saddr, daddr;
__u16 sport, dport;
unsigned int hash, i, *results;
unsigned int hash_size = 65536, iter_num = 100;
table = malloc(hash_size * sizeof(struct hash_entry));
if (!table)
return -ENOMEM;
results = malloc(hash_size * sizeof(unsigned int));
if (!results)
return -ENOMEM;
for (i=0; i<hash_size; ++i) {
results[i] = 0;
table[i].counter = 0;
}
srand(time(NULL));
daddr = num2ip(192, 168, 0, 1);
dport = htons(80);
for (i=0; i<hash_size*iter_num; ++i) {
saddr = num2ip(get_random_byte(), get_random_byte(), get_random_byte(), get_random_byte());
sport = get_random_word();
hash = hash_addr(saddr, sport, daddr, dport);
hash &= (hash_size - 1);
table[hash].counter++;
}
for (i=0; i<hash_size; ++i)
results[table[i].counter]++;
for (i=0; i<hash_size; ++i)
printf("%u %u\n", i, results[i]);
//printf("%u %u\n", i, table[i].counter);
}
- Follow-Ups:
- Re: Question about tcp hash function tcp_hashfn()
- From: Florian Weimer
- Re: Question about tcp hash function tcp_hashfn()
- From: Brian F. G. Bidulock
- Re: Question about tcp hash function tcp_hashfn()
- Prev by Date: Re: Why must NFS access metadata in synchronous mode?
- Next by Date: Re: Question about tcp hash function tcp_hashfn()
- Previous by thread: Re: [patch 00/61] ANNOUNCE: lock validator -V1
- Next by thread: Re: Question about tcp hash function tcp_hashfn()
- Index(es):
Relevant Pages
|