Home
[This page is auto-generated; please prefer to read the article’s PDF Form.]
[Prev] [Up] Variations and an Implementation
Note that h1 and h2,i can be searched in any 𝜖-Universal classes. In above
discussion, we just considered classes with 𝜖 = 1∕TableSize, for both level-1 and
level-2 hashing. If we use any other 𝜖-Universal class for level-2 hashing, then the
condition of Theorem 1 in [3] needs to be evaluated with the corresponding 𝜖, to
find the new condition which ensures certain collision-free functions. Similarly, if
we use any other 𝜖-Universal class for level-1 hashing, the Corollary 4 in
[3] may give a different upper-bound for the sum ∑
i∈ℤm si2. The size
of table T would be dependent upon this sum, similar to what we saw
above.
The following source code in C demonstrates an implementation of this
method for keys of type “unsigned int”. The universal class used for selecting h1
is from example-1 mentioned in page-2 of [3], with m = n. The same
class type with same p but m = mi = si(si − 1) is used for selecting
functions h2,i. Depending upon the requirement (e.g. different key type), an
implementation can choose different universal classes for level-1 and level-2
functions.
Any symbol of the form ‘ab’ from the above discussion has been shown as
‘a{b}’ in the code comments.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define N 100 /* size of the set S of static-keys */
/* the keys can be any 32-bit integer, so choosing ‘p’ as the smallest prime
greater than 2^32-1 */
#define P 4294967311 /* 2^32 + 15 */
typedef unsigned int uint;
typedef struct object
{
uint id;
/* other data */
} object;
typedef struct hashtable
{
uint m;
uint a1, b1; /* for function h{1} */
uint a2[N], b2[N]; /* for functions h{2,i} */
object **T;
int T_offset[N+1];
int T_size;
} hashtable;
/* "id" of these N objects are all distinct, forming the static-keys set */
object objects[N];
hashtable ht;
int *ht_s;
/* function: h(k) = ((ak+b) mod p) mod TableSize */
#define hash(k, a, b, tsize) (((((unsigned long)a)*k + b) %(P)) %(tsize))
void construct_hashtable()
{
int i;
/* the size of each subset S{i} belonging to a level-1 slot; required
only during this method. */
int s[N];
srand(time(NULL));
ht_s = s;
ht.m = N;
/* search for level-1 hash-function h{1} */
search_h1();
/* offsets of tables T{i} in T; returns sum of T{i} sizes */
ht.T_size = calculate_offsets();
ht.T = malloc(ht.T_size*sizeof(object *));
memset(ht.T, 0, ht.T_size*sizeof(object *));
/* distribute input keys among subsets S{i}, using hash-function h{1} */
create_subsets_of_keys();
for(i = 0; i < ht.m; i++)
{
if(ht_s[i] < 2)
continue;
/* search for level-2 hash-function h{2,i} */
search_h2i(i);
/* hash subset S{i} to hashtable T{i}, using hash-function h{2,i} */
hash_subset_of_keys(i);
}
}
void search_h1()
{
int a, b;
while(1)
{
a = 1 + rand();
b = rand();
/* check if the randomly selected function satisfies the upper-bound
3N on the sum of s{i}*s{i} */
if(check_si_sqr_sum(a, b))
break;
}
ht.a1 = a;
ht.b1 = b;
}
int check_si_sqr_sum(uint a, uint b)
{
int i, h;
int si_sqr_sum = 0;
memset(ht_s, 0, N*sizeof(int));
for(i = 0; i < N; i++)
{
h = hash(objects[i].id, a, b, ht.m);
ht_s[h]++;
}
for(i = 0; i < N; i++)
si_sqr_sum += ht_s[i]*ht_s[i];
return si_sqr_sum < 3*N;
}
int calculate_offsets()
{
int i;
int lastsize = 0;
ht.T_offset[0] = 0;
for(i = 0; i < N; i++)
{
if(i > 0)
ht.T_offset[i] = ht.T_offset[i-1] + lastsize;
if(ht_s[i] > 1)
lastsize = ht_s[i]*(ht_s[i]-1);
else
lastsize = ht_s[i];
}
/* extra element T_offset[N] is for simplifying macro SIZE_T() */
ht.T_offset[N] = ht.T_offset[N-1] + lastsize;
return ht.T_offset[N];
}
void create_subsets_of_keys()
{
int i, h1, offset;
int current_size[N];
memset(current_size, 0, sizeof(current_size));
/* we can use the initial part of hashtable T{i} to store subset S{i},
till we hash its keys using level-2 hashing. */
for(i = 0; i < N; i++)
{
h1 = hash(objects[i].id, ht.a1, ht.b1, ht.m);
offset = ht.T_offset[h1];
ht.T[offset + current_size[h1]] = &objects[i];
current_size[h1]++;
}
}
void search_h2i(int i)
{
int a, b;
int si = ht_s[i];
int offset = ht.T_offset[i];
int tsize = si*(si-1);
/* the bitmap required for checking collision-free function */
char *buffer;
int buffer_size = (tsize/8) + 1;
char tmp;
/* Each subset S{i} is first stored in table T{i} (which is in T) at its
initial "si" elements, by create_subsets_of_keys(). Its keys are later
hashed by h{2,i} among the "si*(si-1)" elements of T{i} (if si >= 2).
For searching collision-free function for S{i}, we need a bitmap of
size tsize. For this, we utilize the CURRENTLY unused space of T{i},
which is after the index "offset+si-1". But for si=2, si*(si-1)=si,
so there is no such unused space. */
if(si == 2)
buffer = &tmp;
else
buffer = (char*) &(ht.T[offset+si]);
while(1)
{
memset(buffer, 0, buffer_size);
a = 1 + rand();
b = rand();
/* check if the randomly selected function is collision-free for
subset S{i} */
if(check_collision_free(i, a, b, buffer))
break;
}
ht.a2[i] = a;
ht.b2[i] = b;
if(si != 2)
memset(buffer, 0, buffer_size);
}
#define CHECK_BIT(arr, i) (arr[i/8] & (1 << (i%8)))
#define SET_BIT(arr, i) (arr[i/8] |= (1 << (i%8)))
int check_collision_free(int l1slot, uint a, uint b, char *buffer)
{
int i, h;
int offset = ht.T_offset[l1slot];
int si = ht_s[l1slot];
for(i = offset; i < offset + si; i++)
{
h = hash(ht.T[i]->id, a, b, (si*(si-1)));
if(CHECK_BIT(buffer, h))
return 0;
SET_BIT(buffer, h);
}
return 1;
}
void hash_subset_of_keys(int l1slot)
{
int i, h2, target;
int offset = ht.T_offset[l1slot];
int si = ht_s[l1slot];
uint a2 = ht.a2[l1slot], b2 = ht.b2[l1slot];
object *obj, *existing;
for(i = offset; i < offset + si; i++)
{
obj = ht.T[i];
ht.T[i] = NULL;
h2 = hash(obj->id, a2, b2, (si*(si-1)));
target = offset + h2;
existing = ht.T[target];
ht.T[target] = obj;
/* We had used the initial part of hashtable T{i} to store subset S{i},
and now we are hashing its keys across T{i}. So the "target" index
may contain a not-yet-hashed key of S{i}. */
if(existing)
{
ht.T[i] = existing;
i--;
}
}
}
/* size of T{i} can be inferred from offsets */
#define SIZE_T(i) (ht.T_offset[i+1] - ht.T_offset[i])
object *lookup(uint k)
{
int h1, h2, tsize, index;
object *obj;
h1 = hash(k, ht.a1, ht.b1, ht.m);
tsize = SIZE_T(h1);
if(tsize == 0)
return NULL;
index = ht.T_offset[h1];
/* no level-2 hashing was done for tsize=1 */
if(tsize >= 2)
{
h2 = hash(k, ht.a2[h1], ht.b2[h1], tsize);
index = index + h2;
}
obj = ht.T[index];
if(obj == NULL || obj->id != k)
return NULL;
return obj;
}
■
References
[1] M. L. Fredman, J. Komlós, E. Szemerédi. Storing a Sparse Table
with O(1) Worst Case Access Time. J. ACM, Vol 31 (3) (1984), 538–544.
[2] T. H. Cormen, C. E. Leiserson, R. L. Rivest, C. Stein. Introduction
to Algorithms, Third Edition. The MIT Press (2009).
[3] Nitin Verma. Universal Classes of Hash Functions.
https://mathsanew.com/articles/
universal_classes_hash_functions.pdf (2021).
[Prev] [Up]
Copyright © 2021 Nitin Verma. All rights reserved.