-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.c
157 lines (143 loc) · 3.56 KB
/
benchmark.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <time.h>
#include "rhmap.h"
RHMAP_DECLARE(map, char *)
/*
* I was experimenting with hash functions and found this.
* It seems weirdly good, based on my testing:
* No collisions on 654k English words
* No collisions on all combinations of 1 or 2-byte strings
* No collisions on numeric strings 0-10000000 (10M)
* Superior average search distance to djb2
* Speed within margin of error from djb2
*
* TODO: Test further. There must be a reason nobody uses this.
* If there are issues, maybe a better seed value would help.
*/
size_t hash(const char *str, size_t n)
{
// mine
size_t k = 52711;
while (n --> 0)
k = k * k + *str++;
return k;
/*
// djb2
size_t k = 5381;
int c;
while ((c = *str++))
k = ((k << 5) + k) + c;
return k;
*/
}
size_t hash_str(char *s)
{
return hash(s, strlen(s));
}
void chomp(char *s)
{
char *nl = strchr(s, '\n');
if (nl != NULL)
*nl = '\0';
}
double avg_dist(struct map *m)
{
size_t i, n = 0;
double dist = 0.0;
for (i = 0; i < m->capacity; i++) {
struct map_bucket *b = &m->buckets[i];
if (b->key == RHMAP_EMPTY || b->key == RHMAP_TOMB)
continue;
dist += m->buckets[i].distance;
n++;
}
return dist/n;
}
double std_dist(struct map *m, double mean)
{
size_t i, n = 0;
double sq_dist = 0.0;
for (i = 0; i < m->capacity; i++) {
struct map_bucket *b = &m->buckets[i];
double dist;
if (b->key == RHMAP_EMPTY || b->key == RHMAP_TOMB)
continue;
dist = m->buckets[i].distance - mean;
sq_dist += dist * dist;
n++;
}
return sqrt(sq_dist/n);
}
double timer_us()
{
// I'm accepting some risk of losing precision here for the sake of simplicity
static clock_t then = 0;
clock_t now = clock();
double diff = (((double)(now - then)) * 1000000) / CLOCKS_PER_SEC;
then = now;
return diff;
}
int main(int argc, char **argv)
{
// Usage: cat file | ./a.out
struct map m;
char buf[200];
double mean_dist;
size_t map_size = 1000000;
struct map_bucket *map_mem;
double hash_time = 0.0;
double insert_time = 0.0;
double search_time = 0.0;
size_t num_entries = 0;
size_t num_collisions = 0;
if (argc > 1)
sscanf(argv[1], "%lu", &map_size);
map_mem = malloc(map_size * sizeof(*map_mem));
map_init(&m, map_mem, map_size * sizeof(*map_mem));
while (fgets(buf, sizeof(buf), stdin) != NULL) {
char **str_ptr;
size_t hash;
chomp(buf);
timer_us();
hash = hash_str(buf);
hash_time += timer_us();
str_ptr = map_search(&m, hash);
if (str_ptr != NULL) {
if (strcmp(buf, *str_ptr) != 0) {
printf("Collision: \"%s\" and \"%s\"\n", buf, *str_ptr);
num_collisions++;
free(*map_remove(&m, hash));
}
}
timer_us();
str_ptr = map_insert(&m, hash, strdup(buf));
insert_time += timer_us();
if (str_ptr == NULL) {
printf("Hash table full\n");
break;
}
timer_us();
str_ptr = map_search(&m, hash);
search_time += timer_us();
num_entries++;
}
mean_dist = avg_dist(&m);
printf("Map size: %lu\n", map_size);
printf("Number of entries: %lu\n", num_entries);
printf("Number of hash collisions: %lu\n", num_collisions);
putchar('\n');
printf("Average hash time (us): %f\n", hash_time / num_entries);
printf("Average insertion time (us): %f\n", insert_time / num_entries);
printf("Average search time (us): %f\n", search_time / num_entries);
putchar('\n');
printf("Max distance: %ld\n", m.max_distance);
printf("Average distance: %f\n", mean_dist);
printf("Standard deviation of distance: %f\n", std_dist(&m, mean_dist));
map_clear(&m, (void (*)(char *))free);
free(map_mem);
return 0;
}