Skip to content

examples/c: add hashing and naive substring search algo #331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/c/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
/lsm
/cmake-build-debug/
/cmake-build-release/
/substr
2 changes: 1 addition & 1 deletion examples/c/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS = minimal minimal_legacy minimal_ns bootstrap uprobe kprobe fentry \
usdt sockfilter tc ksyscall task_iter lsm
usdt sockfilter tc ksyscall task_iter lsm substr

CARGO ?= $(shell which cargo)
ifeq ($(strip $(CARGO)),)
Expand Down
192 changes: 192 additions & 0 deletions examples/c/substr.bpf.c
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, Andrii. I see some annotations in the prog like __arg_nonnull, does this help compiler or verifier to optimize their process ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__arg_nonnull is an annotation that can be applied to arguments of global subprog (which is verified by BPF verifier in isolation from main program, based on functions' type signature; so it's a more restricted way to verify, but also allows to scale BPF verification much better, as we create a smaller isolated pieces of logic that BPF verifier won't have to re-validate every single time). It tells BPF verifier that this argument can't be NULL. This will be assumed by verifier when validating the body of that subprogram, but also enforced by verifier when other code calls into this subprogram.

Hope this helps.

Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2020 Facebook */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL";

int my_pid = 0;

#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif

#define MAX_STR_LEN 128
struct cstr {
char data[MAX_STR_LEN];
};

static __always_inline u64 cstr_pos(u64 pos)
{
/* prevent compiler reordering comparison below with array access in cstr_char() */
barrier_var(pos);
/* `pos >= MAX_STR_LEN` never happens, but we need to make verifier happy */
pos = likely(pos < MAX_STR_LEN) ? pos : 0;
barrier_var(pos);
return pos;
}

static __always_inline char cstr_char(const struct cstr *s, u64 pos)
{
return s->data[cstr_pos(pos)];
}

unsigned zero = 0, one = 1; /* obfuscate integers for verifier */

static bool __substr_match(const struct cstr *haystack __arg_nonnull,
const struct cstr *needle __arg_nonnull,
int pos)
{
u64 i;
char c;

bpf_for(i, 0, MAX_STR_LEN) {
c = cstr_char(needle, i);
if (c == '\0')
return true;
if (c != cstr_char(haystack, pos + i))
return false;
}

return true;
}

/*
* Find substring `needle` in a string `haystack`, starting from position
* `start` (zero-indexed). Returns substring start position (>= `start`) if
* match is found; negative result, otherwise.
*/
__noinline int substr_hashed(const struct cstr *haystack __arg_nonnull,
const struct cstr *needle __arg_nonnull,
int start)
{
u32 i, need_hash = zero, hay_hash = zero, mul = one;
int need_len = zero, hay_len = zero, p;

bpf_for(i, 0, MAX_STR_LEN) {
if (needle->data[i] == '\0')
break;

need_len += 1;
need_hash = need_hash * 31 + (u32)needle->data[i];
mul *= 31;
}

if (need_len == 0) /* emtpy substring always matches */
return start;

bpf_for(i, start, MAX_STR_LEN) {
if (haystack->data[i] == '\0')
return -1;

hay_hash = hay_hash * 31 + (u32)haystack->data[i];
hay_len += 1;
if (hay_len < need_len) {
continue;
} else if (hay_len > need_len) {
hay_len -= 1;
hay_hash -= mul * cstr_char(haystack, i - hay_len);
}

/* now hay_len == need_len */
p = i - (hay_len - 1);
if (hay_hash == need_hash && __substr_match(haystack, needle, p))
return p;
}

return -1;
}

__noinline int substr_naive(const struct cstr *haystack __arg_nonnull,
const struct cstr *needle __arg_nonnull,
int start)
{
int *p;

bpf_for_each(num, p, start, MAX_STR_LEN) {
if (cstr_char(haystack, *p) == '\0')
break;

if (__substr_match(haystack, needle, *p))
return *p;
}

return -1;
}

#define BENCH 0
#define BENCH_ITERS 25000

#if BENCH
static struct cstr haystack = { "abacabadabacabaeabacabadabacaba" };
static struct cstr needle = { "eaba" };
#else
static struct cstr haystack = { "abracadabra" };
static struct cstr needle = { "a" };
#endif

SEC("raw_tp/sys_enter")
int test_substr_hashed(void *ctx)
{
int pid = bpf_get_current_pid_tgid() >> 32;
int i, p;

if (pid != my_pid)
return 0;

#if BENCH
u64 start, end;
start = bpf_ktime_get_ns();
bpf_repeat(BENCH_ITERS) {
#endif
p = -1;
bpf_repeat(MAX_STR_LEN) {
p = substr_hashed(&haystack, &needle, p + 1);
if (p < 0)
break;
#if !BENCH
bpf_printk("HASHED match at pos #%d!", p);
#endif
}

#if BENCH
}
end = bpf_ktime_get_ns();
bpf_printk("BENCH HASHED %lu ns/iter", (end - start) / BENCH_ITERS);
#endif
return 0;
}

SEC("raw_tp/sys_enter")
int test_substr_naive(void *ctx)
{
int pid = bpf_get_current_pid_tgid() >> 32;
int i, p;
u64 start, end;

if (pid != my_pid)
return 0;

#if BENCH
start = bpf_ktime_get_ns();
bpf_repeat(BENCH_ITERS) {
#endif
p = -1;
bpf_repeat(MAX_STR_LEN) {
p = substr_naive(&haystack, &needle, p + 1);
if (p < 0)
break;
#if !BENCH
bpf_printk("NAIVE match at pos #%d!", p);
#endif
}

#if BENCH
}
end = bpf_ktime_get_ns();
bpf_printk("BENCH NAIVE %lu ns/iter", (end - start) / BENCH_ITERS);
#endif

return 0;
}
58 changes: 58 additions & 0 deletions examples/c/substr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2020 Facebook */
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "substr.skel.h"

static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}

int main(int argc, char **argv)
{
struct substr_bpf *skel;
int err;

/* Set up libbpf errors and debug info callback */
libbpf_set_print(libbpf_print_fn);

/* Open BPF application */
skel = substr_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}

/* ensure BPF program only handles write() syscalls from our process */
skel->bss->my_pid = getpid();

/* Load & verify BPF programs */
err = substr_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}

/* Attach tracepoint handler */
err = substr_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}

printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` "
"to see output of the BPF programs.\n");

for (;;) {
/* trigger our BPF program */
fprintf(stderr, ".");
sleep(1);
}

cleanup:
substr_bpf__destroy(skel);
return -err;
}
Loading