pmemhackathon · PatKamin · Sep 13, 2021
diff --git a/examples/I/.gitignore b/examples/I/.gitignore
@@ -0,0 +1 @@
+hmat
diff --git a/examples/I/Makefile b/examples/I/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for hmat example
+#
+PROGS = hmat
+CFLAGS = -g -Wall -Werror -std=gnu99
+LIBS = -lmemkind
+
+all: $(PROGS)
+
+hmat: hashmap.o hmat.o
+	$(CC) -o $@ $(CFLAGS) $^ $(LIBS)
+
+clean:
+	$(RM) *.o core
+
+clobber: clean
+	$(RM) $(PROGS) hashmap hmat 
+
+.PHONY: all clean clobber
diff --git a/examples/I/README.txt b/examples/I/README.txt
@@ -0,0 +1,44 @@
+Allocation tiering based on memory-characteristics.
+
+Starting from Linux Kernel version 5.5, the NUMA nodes concept was enhanced with
+new memory attributes: latency, capacity and bandwidth. In heterogeneous memory
+systems, where there are many, potentially varied, memory NUMA nodes,
+applications can use these new attributes to dynamically differentiate the type
+of memory it requests depending on the expected access patterns of the allocated
+data.
+
+Note, that in this example, we don't explicitly say which kind of medium (e.g.
+DRAM or PMEM) we want to use here, because typically the execution environment
+of applications can vary and might not have a specific memory type. Instead of
+that we use a concept of "lowest latency" or "highest bandwidth" nodes that
+could be accessed from a CPU that runs our program. This concept was added to
+Memkind 1.11. To use it, a hwloc library must be installed on the system with
+kernel >= 5.5. For more information about memory attributes, see great
+https://pmem.io/2021/05/12/hmat_memkind.html article on PMEM.io blog.
+
+In this example we use MEMKIND_LOWEST_LATENCY_LOCAL and MEMKIND_HIGHEST_CAPACITY
+memory kinds to allocate simple array of data and access it through a hashmap.
+Because all hashmap operations like searching or adding new data should be
+efficient, keys that identify user data entries are allocated in lowest
+latency memory. On the other hand, because size of the data entires could be
+huge, to allocate them we want memory with the highest capacity.
+
+This example is intended for C programmers.
+
+This example consists of the following files:
+
+hashmap.c/h -- an example of hashmap that uses custom data allocator
+hmat.c      -- main file that uses Memkind and hashmap
+Makefile    -- rules for building this example
+run.sh      -- one way to run this example to illustrate what it does
+
+To build this example run: make
+To run it and see what it illustrates run: ./run.sh
+
+Modifying the code and run steps is a great way to learn from this example.
+
+There is no persistent storage in this example, so topics such as:
+ - flushing to persistent storage,
+ - transactions and failure atomicity
+ are not considered.
+
diff --git a/examples/I/hashmap.c b/examples/I/hashmap.c
@@ -0,0 +1,126 @@
+#include <string.h>
+
+#include "hashmap.h"
+
+struct hashmap
+{
+    int size;
+    struct hashmap_alloc alloc;
+    struct hashmap_keyval* buckets[];
+};
+
+struct hashmap_keyval
+{
+    char* key;
+    void* val;
+    struct hashmap_keyval* next;
+};
+
+/* Very simple hash function */
+static int hash_func(char* key, size_t key_len)
+{
+    int hash = key_len;
+    size_t i;
+
+    for (i = 0; i < key_len; i++) {
+        hash ^= (int)key[i];
+    }
+
+    return hash;
+}
+
+struct hashmap* hashmap_new(int size, struct hashmap_alloc alloc)
+{
+    int i;
+
+    /* Use user provided allocator */
+    struct hashmap* hashmap = alloc.malloc(sizeof(struct hashmap) +
+        (sizeof(struct hashmap_keyval) * size));
+
+    hashmap->size = size;
+    hashmap->alloc = alloc;
+    for (i = 0; i < size; i++) {
+        hashmap->buckets[i] = NULL;
+    }
+
+    return hashmap;
+}
+
+void hashmap_delete(struct hashmap* hashmap)
+{
+    if (hashmap == NULL) {
+        return;
+    }
+
+    int i;
+    for (i = 0; i < hashmap->size; i++) {
+        for (struct hashmap_keyval *iter = hashmap->buckets[i]; iter != NULL;
+            iter = iter->next)
+        {
+            hashmap->alloc.free(iter);
+        }
+    }
+
+    hashmap->alloc.free(hashmap);
+}
+
+struct hashmap_keyval* hashmap_set(struct hashmap* hashmap, char* key,
+    size_t key_len, void* val)
+{
+    if (hashmap == NULL) {
+        return NULL;
+    }
+
+    int hash = hash_func(key, key_len);
+    hash = hash % hashmap->size;
+
+    /* Check if this is the first item in the bucket. */
+    if (hashmap->buckets[hash] == NULL) {
+        struct hashmap_keyval* keyval = 
+            hashmap->alloc.malloc(sizeof(struct hashmap_keyval));
+        keyval->key = key;
+        keyval->val = val;
+        keyval->next = NULL;
+        hashmap->buckets[hash] = keyval;
+        return keyval;
+    }
+
+    struct hashmap_keyval** iter = &hashmap->buckets[hash];
+    while (*iter) {
+        /* Check if the given key exists */
+        if (strcmp((*iter)->key, key) == 0) {
+            /* If so, update the value */
+            (*iter)->val = val;
+            return *iter;
+        }
+        iter = &(*iter)->next;
+    }
+
+    /* Add new value to the end of the bucket */
+    *iter = hashmap->alloc.malloc(sizeof(struct hashmap_keyval));
+    (*iter)->key = key;
+    (*iter)->val = val;
+    (*iter)->next = NULL;
+    return *iter;
+}
+
+void* hashmap_get(struct hashmap* hashmap, char* key, size_t key_len)
+{
+    int hash = hash_func(key, key_len);
+    hash = hash % hashmap->size;
+
+    if (hashmap->buckets[hash] == NULL) {
+        return NULL;
+    }
+
+    struct hashmap_keyval* iter = hashmap->buckets[hash];
+    while (iter) {
+        /* Check if the given key exists */
+        if (strcmp(iter->key, key) == 0) {
+            break;
+        }
+        iter = iter->next;
+    }
+
+    return iter ? iter->val : NULL;
+}
diff --git a/examples/I/hashmap.h b/examples/I/hashmap.h
@@ -0,0 +1,16 @@
+#include <stddef.h>
+
+struct hashmap;
+struct hashmap_keyval;
+
+struct hashmap_alloc {
+    void*(*malloc)(size_t size);
+    void(*free)(void* ptr);
+};
+
+struct hashmap* hashmap_new(int size, struct hashmap_alloc alloc);
+void hashmap_delete(struct hashmap* hashmap);
+
+struct hashmap_keyval* hashmap_set(struct hashmap* hashmap, char* key,
+    size_t key_len, void* val);
+void* hashmap_get(struct hashmap* hashmap, char* key, size_t key_len);
diff --git a/examples/I/hmat.c b/examples/I/hmat.c
@@ -0,0 +1,102 @@
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <memkind.h>
+
+#include "hashmap.h"
+
+/* In this example, we'll use simple data base entries */
+struct db_data {
+    char* name;
+    size_t name_len;
+    void* data;
+};
+
+/* Single-argument wrappers for Memkind's malloc and free */
+void* lowest_latency_malloc(size_t size)
+{
+    return memkind_malloc(MEMKIND_LOWEST_LATENCY_LOCAL, size);
+}
+
+void lowest_latency_free(void* ptr)
+{
+    memkind_free(MEMKIND_LOWEST_LATENCY_LOCAL, ptr);
+}
+
+void* highest_capacity_malloc(size_t size)
+{
+    return memkind_malloc(MEMKIND_HIGHEST_CAPACITY, size);
+}
+
+void highest_capacity_free(void* ptr)
+{
+    memkind_free(MEMKIND_HIGHEST_CAPACITY, ptr);
+}
+
+int main(void)
+{
+    const int DB_SIZE = 100;
+    const int KEY_SIZE = 16;
+    const int DATA_SIZE = 10 * 1024;
+    const int BUCKETS_NUM = 100;
+    int i;
+
+    /* Hashmap structure used in this example allows user to define custom
+     * allocator for buckets and bucket entires. Because we want insert and
+     * get operation to be fast, we use functions that uses Memkind's
+     * MEMKIND_LOWEST_LATENCY kind.
+     */
+    struct hashmap_alloc lowest_latency_allocator = {lowest_latency_malloc,
+        lowest_latency_free};
+
+    /* Create a hashmap - all malloc/free operations (including allocating
+     * hashmap itself) will use our lowest latency allocator.
+     */
+    struct hashmap* hm = hashmap_new(BUCKETS_NUM,
+        lowest_latency_allocator);
+
+    /* Create a data base - here the speed of data access is less important than
+     * ability to store large num of entires, so we use Memkind's
+     * MEMKIND_HIGHEST_CAPACITY kind to store database itself and all entries'
+     * data. Still, we want all hashmap operations on keys to be done with
+     * highest speed, so we keep entires' keys on memory with lowest latency.
+     */
+    struct db_data* db = highest_capacity_malloc(sizeof(struct db_data) *
+        DB_SIZE);
+
+    for (i = 0; i < DB_SIZE; i++) {
+        /* Store data in memory with highest capacity */
+        db[i].data = highest_capacity_malloc(DATA_SIZE);
+
+        /* Store keys in memory with lowest latency */
+        db[i].name = lowest_latency_malloc(KEY_SIZE);
+        size_t name_len = snprintf(db[i].name, KEY_SIZE, "item %d", i);
+        db[i].name_len = name_len;
+
+        /* Add current entry to hashmap */
+        hashmap_set(hm, db[i].name, name_len, db[i].data);
+    }
+
+    /* Iterate over all entires in database and check if all of them were
+     * correctly allocated. NOTE: all memory accesses in this loop are done on
+     * memory with lowest latency.
+     */
+    for (i = 0; i < DB_SIZE; i++) {
+        if (hashmap_get(hm, db[i].name, db[i].name_len) != db[i].data) {
+            fprintf(stderr, "ERROR: bad data at %d\n", i);
+            exit(-1);
+        }
+    }
+
+    /* Cleanup. Note that for freeing memory we could use generic Memkind free
+     * function and pass NULL as kind - this way Memkind will figure out correct
+     * kind by itself only by looking at the data pointer. */
+    for (i = 0; i < DB_SIZE; i++) {
+        memkind_free(NULL, db[i].data);
+        memkind_free(NULL, db[i].name);
+    }
+    highest_capacity_free(db);
+
+    hashmap_delete(hm);
+    return 0;
+}
diff --git a/examples/I/run.sh b/examples/I/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash -ex
+#
+# shell commands to run this example
+#
+
+./hmat