-
Notifications
You must be signed in to change notification settings - Fork 320
examples/c: add hashing and naive substring search algo #331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
anakryiko
wants to merge
1
commit into
libbpf:master
Choose a base branch
from
anakryiko:example-substr
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,4 @@ | |
/lsm | ||
/cmake-build-debug/ | ||
/cmake-build-release/ | ||
/substr |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause | ||
/* Copyright (c) 2020 Facebook */ | ||
#include <vmlinux.h> | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_core_read.h> | ||
|
||
char LICENSE[] SEC("license") = "Dual BSD/GPL"; | ||
|
||
int my_pid = 0; | ||
|
||
#ifndef likely | ||
#define likely(x) __builtin_expect(!!(x), 1) | ||
#endif | ||
|
||
#define MAX_STR_LEN 128 | ||
struct cstr { | ||
char data[MAX_STR_LEN]; | ||
}; | ||
|
||
static __always_inline u64 cstr_pos(u64 pos) | ||
{ | ||
/* prevent compiler reordering comparison below with array access in cstr_char() */ | ||
barrier_var(pos); | ||
/* `pos >= MAX_STR_LEN` never happens, but we need to make verifier happy */ | ||
pos = likely(pos < MAX_STR_LEN) ? pos : 0; | ||
barrier_var(pos); | ||
return pos; | ||
} | ||
|
||
static __always_inline char cstr_char(const struct cstr *s, u64 pos) | ||
{ | ||
return s->data[cstr_pos(pos)]; | ||
} | ||
|
||
unsigned zero = 0, one = 1; /* obfuscate integers for verifier */ | ||
|
||
static bool __substr_match(const struct cstr *haystack __arg_nonnull, | ||
const struct cstr *needle __arg_nonnull, | ||
int pos) | ||
{ | ||
u64 i; | ||
char c; | ||
|
||
bpf_for(i, 0, MAX_STR_LEN) { | ||
c = cstr_char(needle, i); | ||
if (c == '\0') | ||
return true; | ||
if (c != cstr_char(haystack, pos + i)) | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
/* | ||
* Find substring `needle` in a string `haystack`, starting from position | ||
* `start` (zero-indexed). Returns substring start position (>= `start`) if | ||
* match is found; negative result, otherwise. | ||
*/ | ||
__noinline int substr_hashed(const struct cstr *haystack __arg_nonnull, | ||
const struct cstr *needle __arg_nonnull, | ||
int start) | ||
{ | ||
u32 i, need_hash = zero, hay_hash = zero, mul = one; | ||
int need_len = zero, hay_len = zero, p; | ||
|
||
bpf_for(i, 0, MAX_STR_LEN) { | ||
if (needle->data[i] == '\0') | ||
break; | ||
|
||
need_len += 1; | ||
need_hash = need_hash * 31 + (u32)needle->data[i]; | ||
mul *= 31; | ||
} | ||
|
||
if (need_len == 0) /* emtpy substring always matches */ | ||
return start; | ||
|
||
bpf_for(i, start, MAX_STR_LEN) { | ||
if (haystack->data[i] == '\0') | ||
return -1; | ||
|
||
hay_hash = hay_hash * 31 + (u32)haystack->data[i]; | ||
hay_len += 1; | ||
if (hay_len < need_len) { | ||
continue; | ||
} else if (hay_len > need_len) { | ||
hay_len -= 1; | ||
hay_hash -= mul * cstr_char(haystack, i - hay_len); | ||
} | ||
|
||
/* now hay_len == need_len */ | ||
p = i - (hay_len - 1); | ||
if (hay_hash == need_hash && __substr_match(haystack, needle, p)) | ||
return p; | ||
} | ||
|
||
return -1; | ||
} | ||
|
||
__noinline int substr_naive(const struct cstr *haystack __arg_nonnull, | ||
const struct cstr *needle __arg_nonnull, | ||
int start) | ||
{ | ||
int *p; | ||
|
||
bpf_for_each(num, p, start, MAX_STR_LEN) { | ||
if (cstr_char(haystack, *p) == '\0') | ||
break; | ||
|
||
if (__substr_match(haystack, needle, *p)) | ||
return *p; | ||
} | ||
|
||
return -1; | ||
} | ||
|
||
#define BENCH 0 | ||
#define BENCH_ITERS 25000 | ||
|
||
#if BENCH | ||
static struct cstr haystack = { "abacabadabacabaeabacabadabacaba" }; | ||
static struct cstr needle = { "eaba" }; | ||
#else | ||
static struct cstr haystack = { "abracadabra" }; | ||
static struct cstr needle = { "a" }; | ||
#endif | ||
|
||
SEC("raw_tp/sys_enter") | ||
int test_substr_hashed(void *ctx) | ||
{ | ||
int pid = bpf_get_current_pid_tgid() >> 32; | ||
int i, p; | ||
|
||
if (pid != my_pid) | ||
return 0; | ||
|
||
#if BENCH | ||
u64 start, end; | ||
start = bpf_ktime_get_ns(); | ||
bpf_repeat(BENCH_ITERS) { | ||
#endif | ||
p = -1; | ||
bpf_repeat(MAX_STR_LEN) { | ||
p = substr_hashed(&haystack, &needle, p + 1); | ||
if (p < 0) | ||
break; | ||
#if !BENCH | ||
bpf_printk("HASHED match at pos #%d!", p); | ||
#endif | ||
} | ||
|
||
#if BENCH | ||
} | ||
end = bpf_ktime_get_ns(); | ||
bpf_printk("BENCH HASHED %lu ns/iter", (end - start) / BENCH_ITERS); | ||
#endif | ||
return 0; | ||
} | ||
|
||
SEC("raw_tp/sys_enter") | ||
int test_substr_naive(void *ctx) | ||
{ | ||
int pid = bpf_get_current_pid_tgid() >> 32; | ||
int i, p; | ||
u64 start, end; | ||
|
||
if (pid != my_pid) | ||
return 0; | ||
|
||
#if BENCH | ||
start = bpf_ktime_get_ns(); | ||
bpf_repeat(BENCH_ITERS) { | ||
#endif | ||
p = -1; | ||
bpf_repeat(MAX_STR_LEN) { | ||
p = substr_naive(&haystack, &needle, p + 1); | ||
if (p < 0) | ||
break; | ||
#if !BENCH | ||
bpf_printk("NAIVE match at pos #%d!", p); | ||
#endif | ||
} | ||
|
||
#if BENCH | ||
} | ||
end = bpf_ktime_get_ns(); | ||
bpf_printk("BENCH NAIVE %lu ns/iter", (end - start) / BENCH_ITERS); | ||
#endif | ||
|
||
return 0; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) | ||
/* Copyright (c) 2020 Facebook */ | ||
#include <stdio.h> | ||
#include <unistd.h> | ||
#include <sys/resource.h> | ||
#include <bpf/libbpf.h> | ||
#include "substr.skel.h" | ||
|
||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) | ||
{ | ||
return vfprintf(stderr, format, args); | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
struct substr_bpf *skel; | ||
int err; | ||
|
||
/* Set up libbpf errors and debug info callback */ | ||
libbpf_set_print(libbpf_print_fn); | ||
|
||
/* Open BPF application */ | ||
skel = substr_bpf__open(); | ||
if (!skel) { | ||
fprintf(stderr, "Failed to open BPF skeleton\n"); | ||
return 1; | ||
} | ||
|
||
/* ensure BPF program only handles write() syscalls from our process */ | ||
skel->bss->my_pid = getpid(); | ||
|
||
/* Load & verify BPF programs */ | ||
err = substr_bpf__load(skel); | ||
if (err) { | ||
fprintf(stderr, "Failed to load and verify BPF skeleton\n"); | ||
goto cleanup; | ||
} | ||
|
||
/* Attach tracepoint handler */ | ||
err = substr_bpf__attach(skel); | ||
if (err) { | ||
fprintf(stderr, "Failed to attach BPF skeleton\n"); | ||
goto cleanup; | ||
} | ||
|
||
printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` " | ||
"to see output of the BPF programs.\n"); | ||
|
||
for (;;) { | ||
/* trigger our BPF program */ | ||
fprintf(stderr, "."); | ||
sleep(1); | ||
} | ||
|
||
cleanup: | ||
substr_bpf__destroy(skel); | ||
return -err; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, Andrii. I see some annotations in the prog like
__arg_nonnull
, does this help compiler or verifier to optimize their process ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__arg_nonnull
is an annotation that can be applied to arguments of global subprog (which is verified by BPF verifier in isolation from main program, based on functions' type signature; so it's a more restricted way to verify, but also allows to scale BPF verification much better, as we create a smaller isolated pieces of logic that BPF verifier won't have to re-validate every single time). It tells BPF verifier that this argument can't be NULL. This will be assumed by verifier when validating the body of that subprogram, but also enforced by verifier when other code calls into this subprogram.Hope this helps.