-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhashtable.c
276 lines (219 loc) · 8.31 KB
/
hashtable.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#include <stdlib.h>
#include <string.h>
#include<stdio.h>
#include "hashtable.h"
#define RADIX_1 26
#define MOD_1 100000007
#define RADIX_2 27
#define MOD_2 100000033
#define HT_MINIMUM_SIZE 4 // minimum size hash table will shrink is 4
static ht* create_new_sized_ht(const int new_size){
ht* new_ht=malloc(sizeof(ht));
// new_ht->min_size=new_size;
new_ht->size=new_size;
new_ht->active_key_count=0;
new_ht->used_key_count=0;
new_ht->items=calloc(new_ht->size,sizeof(ht_item*));
// hashtable items are allocated with (no. of items times size of one item) memory
return new_ht;
}
ht* create_ht(){
return create_new_sized_ht(HT_MINIMUM_SIZE);
}
static ht_item* new_item(const char* key, const char* val ){ // will be invoked by internal functions
ht_item* item=malloc(sizeof(ht_item)); // allocate memory for a new field in hash table
item->key=strdup(key); // create a copy of the source and store it in another pointer
item->val=strdup(val);
item->soft_deleted=false;
return item;
}
static void delete_item (ht_item* item){ // will be called by internal functions
// to avoid any memeory leaks, or dangling pointers we will free the key and value and the item pointer.
free(item->key);
free(item->val);
free(item);
}
void ht_delete(ht* my_ht){
for(int i=0;i<my_ht->size;i++){ // iterate through all the keys and free all the key, value and item pointer.
ht_item* my_ht_item=my_ht->items[i];
if(my_ht_item){
delete_item(my_ht_item);
}
}
free(my_ht->items);
free(my_ht);
}
// hash function
static int calculate_hash(const char* s,int RADIX,int MOD){
int n=strlen(s);
long factor=1,val=0;
for(int i=n-1;i>=0;i--){
val=(val+((s[i]-'a')*factor)%MOD)%MOD;
factor=(factor*RADIX)%MOD;
}
return (int)val%MOD;
}
static int get_hash(const char* s, const int attempt, const int m ){
// if hash collides, make another attempt, and attempt count is a factor in determining hash.
// keep attempt strictly greater than 0.
const int hash1= calculate_hash(s, RADIX_1, MOD_1);
const int hash2= calculate_hash(s, RADIX_2, MOD_2);
return (hash1+ ((hash2+1)*attempt)) % m; // hash2 should not be zero because it will colapse the meaning of double hashing then, so add 1
}
static void ht_resize(ht* my_ht, const int new_size){ // consider changing the size of hashmap in the structure
// base size to ensure we dont completely destroy the table
if(new_size<HT_MINIMUM_SIZE)
return;
ht* new_ht=create_new_sized_ht(new_size);
for(int i=0;i<my_ht->size;i++){
ht_item* item=my_ht->items[i];
if(item!=NULL && item->soft_deleted==false){ // move the non-soft-deleted elements to the new table
ht_insert_key(new_ht,item->key,item->val);
}
}
// since we will be invoking resize from another function like insert (based upon the load factor)
// we need to modify the my_ht only
// so move the assignments too
// my_ht->min_size=new_ht->min_size;
my_ht->used_key_count=new_ht->used_key_count;
my_ht->active_key_count=new_ht->active_key_count;
// both counter will be same just after resize, since wthe soft_deleted_keys were removed while new insertion.
const int ht_size=my_ht->size;
my_ht->size=new_ht->size;
new_ht->size=ht_size;
ht_item** all_items=my_ht->items;
my_ht->items=new_ht->items;
new_ht->items=all_items;
ht_delete(new_ht); // after shifting size, and item pointers, delete the new_ht made, we are back with my_ht
}
void* ht_insert_key(ht* my_ht, char* key, char*val){
double load_factor=(1.0*my_ht->used_key_count)/my_ht->size;
if(load_factor>=0.5){
ht_resize(my_ht,my_ht->size*2); // increase the size of hash table by twice
}
ht_item* item=new_item(key,val);
int idx=get_hash(key, 1, my_ht->size);
ht_item* curr_match=my_ht->items[idx];
int i=2; // count for attempt
while(curr_match){ // matching hash already had an element in it
if(strcmp(curr_match->key,key)==0){ // same key is begin inserted twice
//replacing the value.
curr_match->val=val;
return NULL;
}
idx=get_hash(key, i, my_ht->size);
curr_match=my_ht->items[idx];
i++;
}
my_ht->items[idx]=item;
my_ht->active_key_count++;
my_ht->used_key_count++;
return NULL;
}
char* ht_search_key(ht* my_ht, const char* key){
int idx=get_hash(key, 0, my_ht->size);
ht_item* curr_match=my_ht->items[idx];
int i=1;
while(curr_match){
if(curr_match->soft_deleted==false){
if(strcmp(key,curr_match->key)==0) // explictly check if keys are same
return curr_match->val;
}
idx=get_hash(key, i, my_ht->size); // probe the wole array
curr_match=my_ht->items[idx];
i++;
}
return NULL;
}
static void soft_delete_item (ht_item* item){ // dont free pointers yet.
item->soft_deleted=true;
}
void* ht_soft_delete_key(ht* my_ht, const char* key){
int idx=get_hash(key, 1, my_ht->size);
ht_item* curr_match=my_ht->items[idx];
int i=1;
while(curr_match){
if(curr_match->soft_deleted==false){
if(strcmp(key,curr_match->key)==0){ // explictly check if keys are same
soft_delete_item(curr_match);
my_ht->active_key_count--;
break;
}
}
idx=get_hash(key, i, my_ht->size); // otherwise probe the whole array
curr_match=my_ht->items[idx];
i++;
}
// used_key_count doesnt changes in soft_delete, how will our factor decrease than ?
// we need to have a threshold before we can shrink the array.
// using an effective_load_factor approach ( because it depends on the hash function how many less collisions we have which implies how many less soft_deletes we will have )
// might not be perfect, need to look further.
double effective_load_factor = (my_ht->active_key_count + 0.25 * my_ht->used_key_count) / my_ht->size;
if(my_ht->active_key_count==0){ // all elements were deleted
printf("No elements in the hash table\n");
printf("Resizing to HT_MINIMUM_SIZE");
ht_resize(my_ht,HT_MINIMUM_SIZE);
return NULL;
}
if(effective_load_factor<0.25 && my_ht->size >= 2*HT_MINIMUM_SIZE){
printf("\nResizing.....\n");
ht_resize(my_ht,my_ht->size/2); // decrease the size of hash table by half
}
return NULL;
}
void print_ht(ht* my_ht){
printf("\n\nPrinting Hash Table\n");
printf("size: %d\n",my_ht->size);
printf("keys active: %d\n",my_ht->active_key_count);
printf("keys used: %d\n",my_ht->used_key_count);
printf("items in my_ht\n\n");
printf("key\t | \tvalue\t | \tsoft_deleted\n");
ht_item** items=my_ht->items;
if(items==NULL)
return;
for(int i=0;i<my_ht->size;i++){
if(items[i]!=NULL){
char* k=items[i]->key;
char* v=items[i]->val;
bool sd=items[i]->soft_deleted;
printf("%s\t\t %s\t\t %s\n",k ,v , sd?"true":"false");
printf("\n");
}
}
}
int main(){
ht* my_ht=create_ht();
char key[20],val[20];
int input;
while(1) {
printf("\n\nEnter 1 to insert a number, Enter 2 to delete, Enter 3 to Print\n\n");
scanf("%d", &input);
while(getchar() != '\n');
switch (input) {
case 1:
printf("Enter key and value to insert\n");
printf("Key: ");
fgets(key, sizeof(key), stdin);
key[strcspn(key, "\n")] = '\0';
printf("Value: ");
fgets(val, sizeof(val), stdin);
val[strcspn(val, "\n")] = '\0';
ht_insert_key(my_ht, key, val);
break;
case 2:
printf("Enter key to delete\n");
printf("Key: ");
fgets(key, sizeof(key), stdin);
key[strcspn(key, "\n")] = '\0';
ht_soft_delete_key(my_ht, key);
break;
case 3:
print_ht(my_ht);
break;
default:
printf("Invalid input, please enter 1, 2, or 3.\n");
break;
}
}
return 0;
}