因为别人知道源码怎么实现的,故意构造相同的hashset源代码的字符串进行攻击,怎么处理

The page is temporarily unavailable
nginx error!
The page you are looking for is temporarily unavailable.
Please try again later.
Website Administrator
Something has triggered an error on your
This is the default error page for
nginx that is distributed with
It is located
/usr/share/nginx/html/50x.html
You should customize this error page for your own
site or edit the error_page directive in
the nginx configuration file
/etc/nginx/nginx.conf.[100分]寻找哈希值一样的字符串
[问题点数:100分,结帖人CsToD]
[100分]寻找哈希值一样的字符串
[问题点数:100分,结帖人CsToD]
不显示删除回复
显示所有回复
显示星级回复
显示得分回复
只显示楼主
2007年5月 总版技术专家分月排行榜第一
2008年6月 总版技术专家分月排行榜第二2007年6月 总版技术专家分月排行榜第二
本帖子已过去太久远了,不再提供回复功能。哈希表(实验报告附C++源码)_百度文库
两大类热门资源免费畅读
续费一年阅读会员,立省24元!
哈希表(实验报告附C++源码)
上传于||文档简介
&&即​散​列​表​。​使​用​散​列​函​数​,​实​现​开​散​列​。​问​题​背​景​是​一​串​书​名​,​更​具​书​名​,​确​定​槽​位​置​。
阅读已结束,如果下载本文需要使用2下载券
想免费下载本文?
定制HR最喜欢的简历
下载文档到电脑,查找使用更方便
还剩4页未读,继续阅读
定制HR最喜欢的简历
你可能喜欢当前位置:
(暂无评分)
&Loading ...
hash查找因为其O(1)的查找性能而著称,被对查找性能要求高的应用所广泛采用。它的基本思想是:
(1) 创建一个定长的线性Hash表,一般可以初始化时指定
(2) 设计Hash函数,将关键字key散射到Hash表中。其中hash函数设计是最为关键的,均匀分布、冲突概率小全在它;
(3) 通常采用拉链方法来解决hash冲突问题,即散射到同一个hash表项的关键字,以链表形式来表示(也称为桶backet);
(4) 给定关键字key,就可以在O(1) + O(m)的时间复杂度内定位到目标。其中,m为拉链长度,即桶深。
Hash应用中,字符串是最为常见的关键字,应用非常普通,现在的程序设计语言中基本上都提供了字符串hash表的支持。字符串hash函数非常多,常见的主要有Simple_hash, RS_hash, JS_hash, PJW_hash, ELF_hash, BKDR_hash, SDBM_hash, DJB_hash, AP_hash, CRC_hash等。它们的C语言实现见后面附录代码: hash.h, hash.c。那么这么些字符串hash函数,谁好熟非呢?评估hash函数优劣的基准主要有以下两个指标:
(1) 散列分布性
即桶的使用率backet_usage = (已使用桶数) / (总的桶数),这个比例越高,说明分布性良好,是好的hash设计。
(2) 平均桶长
即avg_backet_len,所有已使用桶的平均长度。理想状态下这个值应该=1,越小说明冲突发生地越少,是好的hash设计。
hash函数计算一般都非常简洁,因此在耗费计算时间复杂性方面判别甚微,这里不作对比。
评估方案设计是这样的:
(1) 以200M的视频文件作为输入源,以4KB的块为大小计算MD5值,并以此作为hash关键字;
(2) 分别应用上面提到的各种字符串hash函数,进行hash散列模拟;
(3) 统计结果,用散列分布性和平均桶长两个指标进行评估分析。
测试程序见附录代码hashtest.c,测试结果如下表所示。从这个结果我们也可以看出,这些字符串hash函数真是不相仲伯,难以决出高低,所以实际应用中可以根据喜好选择。当然,最好实际测试一下,毕竟应用特点不大相同。其他几组测试结果也类似,这里不再给出。
Hash函数 桶数 Hash调用总数 最大桶长 平均桶长 桶使用率%
simple_hash 10240 47198 16 4.63 99.00%
RS_hash 10240 47198 16 4.63 98.91%
JS_hash 10240 47198 15 4.64 98.87%
PJW_hash 10240 47198 16 4.63 99.00%
ELF_hash 10240 47198 16 4.63 99.00%
BKDR_hash 10240 47198 16 4.63 99.00%
SDBM_hash 10240 47198 16 4.63 98.90%
DJB_hash 10240 47198 15 4.64 98.85%
AP_hash 10240 47198 16 4.63 98.96%
CRC_hash 10240 47198 16 4.64 98.77%
附录源代码:
#ifndef _HASH_H
#define _HASH_H
#ifdef __cplusplus
extern &C& {
/* A Simple Hash Function */
unsigned int simple_hash(char *str);
/* RS Hash Function */
unsigned int RS_hash(char *str);
/* JS Hash Function */
unsigned int JS_hash(char *str);
/* P. J. Weinberger Hash Function */
unsigned int PJW_hash(char *str);
/* ELF Hash Function */
unsigned int ELF_hash(char *str);
/* BKDR Hash Function */
unsigned int BKDR_hash(char *str);
/* SDBM Hash Function */
unsigned int SDBM_hash(char *str);
/* DJB Hash Function */
unsigned int DJB_hash(char *str);
/* AP Hash Function */
unsigned int AP_hash(char *str);
/* CRC Hash Function */
unsigned int CRC_hash(char *str);
#ifdef __cplusplus
#include &string.h&
#include &hash.h&
/* A Simple Hash Function */
unsigned int simple_hash(char *str)
register unsigned int hash;
register unsigned char *p;
for(hash = 0, p = (unsigned char *)str; *p ; p++)
hash = 31 * hash + *p;
return (hash & 0x7FFFFFFF);
/* RS Hash Function */
unsigned int RS_hash(char *str)
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;
while (*str)
hash = hash * a + (*str++);
return (hash & 0x7FFFFFFF);
/* JS Hash Function */
unsigned int JS_hash(char *str)
unsigned int hash = ;
while (*str)
hash ^= ((hash && 5) + (*str++) + (hash && 2));
return (hash & 0x7FFFFFFF);
/* P. J. Weinberger Hash Function */
unsigned int PJW_hash(char *str)
unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
unsigned int ThreeQuarters
= (unsigned int)((BitsInUnignedInt
* 3) / 4);
unsigned int OneEighth
= (unsigned int)(BitsInUnignedInt / 8);
unsigned int HighBits
= (unsigned int)(0xFFFFFFFF) && (BitsInUnignedInt - OneEighth);
unsigned int hash
unsigned int test
while (*str)
hash = (hash && OneEighth) + (*str++);
if ((test = hash & HighBits) != 0)
hash = ((hash ^ (test && ThreeQuarters)) & (~HighBits));
return (hash & 0x7FFFFFFF);
/* ELF Hash Function */
unsigned int ELF_hash(char *str)
unsigned int hash = 0;
unsigned int x
while (*str)
hash = (hash && 4) + (*str++);
if ((x = hash & 0xF0000000L) != 0)
hash ^= (x && 24);
hash &= ~x;
return (hash & 0x7FFFFFFF);
/* BKDR Hash Function */
unsigned int BKDR_hash(char *str)
unsigned int seed = 131; // 31 131
131313 etc..
unsigned int hash = 0;
while (*str)
hash = hash * seed + (*str++);
return (hash & 0x7FFFFFFF);
/* SDBM Hash Function */
unsigned int SDBM_hash(char *str)
unsigned int hash = 0;
while (*str)
hash = (*str++) + (hash && 6) + (hash && 16) - hash;
return (hash & 0x7FFFFFFF);
/* DJB Hash Function */
unsigned int DJB_hash(char *str)
unsigned int hash = 5381;
while (*str)
hash += (hash && 5) + (*str++);
return (hash & 0x7FFFFFFF);
/* AP Hash Function */
unsigned int AP_hash(char *str)
unsigned int hash = 0;
for (i=0; *str; i++)
if ((i & 1) == 0)
hash ^= ((hash && 7) ^ (*str++) ^ (hash && 3));
hash ^= (~((hash && 11) ^ (*str++) ^ (hash && 5)));
return (hash & 0x7FFFFFFF);
/* CRC Hash Function */
unsigned int CRC_hash(char *str)
unsigned int
= strlen(str);
unsigned long long
unsigned short int *w
= (unsigned short int *)str;
unsigned short int
* Our algorithm is simple, using a 32 bit accumulator (sum), we add
* sequential 16 bit words to it, and at the end, fold back all the
* carry bits from the top 16 bits into the lower 16 bits.
while ( nleft & 1 ) {
sum += *w++;
nleft -= 2;
* mop up an odd byte, if necessary
if ( 1 == nleft ) {
*( unsigned char * )( &answer ) = *( unsigned char * )w ;
sum += answer;
* add back carry outs from top 16 bits to low 16 bits
* add hi 16 to low 16
sum = ( sum && 16 ) + ( sum & 0xFFFF );
/* add carry */
sum += ( sum && 16 );
/* truncate to 16 bits */
answer = ~sum;
return (answer & 0xFFFFFFFF);
hashtest.c
#include &stdio.h&
#include &stdlib.h&
#include &sys/types.h&
#include &sys/stat.h&
#include &fcntl.h&
#include &errno.h&
#include &string.h&
#include &hash.h&
#include &md5.h&
struct hash_key {
unsigned char *key;
struct hash_key *next;
struct hash_counter_entry {
unsigned int hit_count;
unsigned int entry_count;
struct hash_key *keys;
#define BLOCK_LEN
static int backet_len = 10240;
static int hash_call_count = 0;
static struct hash_counter_entry *hlist = NULL;
unsigned int (*hash_func)(char *str);
void choose_hash_func(char *hash_func_name)
if (0 == strcmp(hash_func_name, &simple_hash&))
hash_func = simple_hash;
else if (0 == strcmp(hash_func_name, &RS_hash&))
hash_func = RS_hash;
else if (0 == strcmp(hash_func_name, &JS_hash&))
hash_func = JS_hash;
else if (0 == strcmp(hash_func_name, &PJW_hash&))
hash_func = PJW_hash;
else if (0 == strcmp(hash_func_name, &ELF_hash&))
hash_func = ELF_hash;
else if (0 == strcmp(hash_func_name, &BKDR_hash&))
hash_func = BKDR_hash;
else if (0 == strcmp(hash_func_name, &SDBM_hash&))
hash_func = SDBM_hash;
else if (0 == strcmp(hash_func_name, &DJB_hash&))
hash_func = DJB_hash;
else if (0 == strcmp(hash_func_name, &AP_hash&))
hash_func = AP_hash;
else if (0 == strcmp(hash_func_name, &CRC_hash&))
hash_func = CRC_hash;
hash_func = NULL;
void insert_hash_entry(unsigned char *key, struct hash_counter_entry *hlist)
unsigned int hash_value = hash_func(key) % backet_len;
struct hash_key *p;
p = hlist[hash_value].keys;
while(p) {
if (0 == strcmp(key, p-&key))
p = p-&next;
if (p == NULL)
p = (struct hash_key *)malloc(sizeof(struct hash_key));
if (p == NULL)
perror(&malloc in insert_hash_entry&);
p-&key = strdup(key);
p-&next = hlist[hash_value].keys;
hlist[hash_value].keys = p;
hlist[hash_value].entry_count++;
hlist[hash_value].hit_count++;
void hashtest_init()
hash_call_count = 0;
hlist = (struct hash_counter_entry *) malloc (sizeof(struct hash_counter_entry) *
backet_len);
if (NULL == hlist)
perror(&malloc in hashtest_init&);
for (i = 0; i & backet_len; i++)
hlist[i].hit_count = 0;
hlist[i].entry_count = 0;
hlist[i].keys = NULL;
void hashtest_clean()
struct hash_key *pentry, *p;
if (NULL == hlist)
for (i = 0; i & backet_len; ++i)
pentry = hlist[i].keys;
while(pentry)
p = pentry-&next;
if (pentry-&key) free(pentry-&key);
free(pentry);
pentry = p;
free(hlist);
void show_hashtest_result()
int i, backet = 0, max_link = 0, sum = 0;
int conflict_count = 0, hit_count = 0;
float avg_link, backet_usage;
for(i = 0; i & backet_len; i++)
if (hlist[i].hit_count & 0)
sum += hlist[i].entry_count;
if (hlist[i].entry_count & max_link)
max_link = hlist[i].entry_count;
if (hlist[i].entry_count & 1)
conflict_count++;
hit_count += hlist[i].hit_count;
backet_usage = backet/1.0/backet_len * 100;;
avg_link = sum/1.0/backet;
printf(&backet_len = %d/n&, backet_len);
printf(&hash_call_count = %d/n&, hash_call_count);
printf(&hit_count = %d/n&, hit_count);
printf(&conflict count = %d/n&, conflict_count);
printf(&longest hash entry = %d/n&, max_link);
printf(&average hash entry length = %.2f/n&, avg_link);
printf(&backet usage = %.2f%/n&, backet_usage);
void usage()
printf(&Usage: hashtest filename hash_func_name [backet_len]/n&);
printf(&hash_func_name:/n&);
printf(&/tsimple_hash/n&);
printf(&/tRS_hash/n&);
printf(&/tJS_hash/n&);
printf(&/tPJW_hash/n&);
printf(&/tELF_hash/n&);
printf(&/tBKDR_hash/n&);
printf(&/tSDBM_hash/n&);
printf(&/tDJB_hash/n&);
printf(&/tAP_hash/n&);
printf(&/tCRC_hash/n&);
void md5_to_32(unsigned char *md5_16, unsigned char *md5_32)
for (i = 0; i & 16; ++i)
sprintf(md5_32 + i * 2, &%02x&, md5_16[i]);
int main(int argc, char *argv[])
int fd = -1, rwsize = 0;
unsigned char md5_checksum[16 + 1] = {0};
unsigned char buf[BLOCK_LEN] = {0};
if (argc & 3)
usage();
return -1;
if (-1 == (fd = open(argv[1], O_RDONLY)))
perror(&open source file&);
return errno;
if (argc == 4)
backet_len = atoi(argv[3]);
hashtest_init();
choose_hash_func(argv[2]);
while (rwsize = read(fd, buf, BLOCK_LEN))
md5(buf, rwsize, md5_checksum);
insert_hash_entry(md5_checksum, hlist);
hash_call_count++;
memset(buf, 0, BLOCK_LEN);
memset(md5_checksum, 0, 16 + 1);
close(fd);
show_hashtest_result();
hashtest_clean();
本文固定链接:
【上一篇】【下一篇】
您可能还会对这些文章感兴趣!

我要回帖

更多关于 构造hash函数的方法 的文章

 

随机推荐