Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,26 @@ unit_tests/pcaps/*.pcap
*.mk
*.tgz
src/libmerc/asn1/oid.cc
src/libmerc/asn1/oid.h
src/libmerc/asn1/oid.h

# Build artifacts
src/cert_analyze
src/intercept_server
src/libmerc/.depend
src/libmerc/__.SYMDEF*
src/libmerc/lctrie/.d/
src/libmerc/lctrie/liblctrie.a
src/libmerc_test
src/libmerc_util
src/unit_test
src/libmerc/*.o
src/libmerc/asn1/*.o
src/libmerc/libmerc.a
src/libmerc/libmerc.so
unit_tests/Makefile
unit_tests/*.o
unit_tests/mercury_test

# macOS specific
.DS_Store
*.dSYM/
108 changes: 108 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

Mercury is an open-source network metadata capture and analysis package for network security monitoring, fingerprinting, and protocol analysis. It consists of:
- **mercury**: High-performance Linux packet capture application using AF_PACKET
- **libmerc**: Core packet processing library (C++17)
- **pmercury**: Python interface via Cython bindings
- **Utilities**: cert_analyze, tls_scanner, batch_gcd for specialized analysis

## Essential Commands

### Build Commands
```bash
# Standard build
./configure && make

# Debug build
make debug-mercury

# Build libraries only
make libs

# Install with systemd service
sudo make install MERCURY_CFG=mercury.cfg

# Non-root installation
sudo make install-nonroot

# Build Cython wheel
cd src/cython && make wheel
```

### Testing Commands
```bash
# Run all tests
make test

# Run unit tests
make unit_tests
cd unit_tests && make

# Generate coverage report
make coverage_report

# Run specific test
cd unit_tests && ./mercury_test "[test_name]"
```

### Development Commands
```bash
# Format code
make format

# Generate documentation
make doc # Doxygen
make sphinx # Sphinx docs

# Version management
make increment-patchlevel # Bump patch version
make increment-minor-version # Bump minor version
```

## Architecture Overview

### Core Components

**libmerc Library** (`src/libmerc/`)
- Main entry point: `libmerc.cc`, `pkt_proc.cc` - packet processing engine
- Protocol parsers in individual files: `tls.cc`, `http.cc`, `dns.cc`, `ssh.cc`, etc.
- Analysis engine: `analysis.cc` - process identification and malware detection
- Memory-safe parsing: `datum.h` - zero-copy packet parsing without heap allocation

**Protocol Fingerprinting**
- TLS fingerprint generation using JA3/JA3S algorithms
- HTTP header fingerprinting
- DNS query pattern analysis
- All fingerprinting code outputs JSON for easy integration

**Performance Critical Paths**
- Packet processing uses lockless ring buffers and memory pools
- JSON output bypasses std::ostream for speed (`json_object.h`)
- Protocol parsing uses fixed-size stack buffers to avoid heap allocation

### Python Integration

The Python interface (`python/pmercury/`) uses Cython bindings (`src/cython/`) to expose:
- `mercury_python` module for packet analysis
- `perform_analysis()` and `perform_analysis_with_weights()` functions
- Direct access to fingerprinting and analysis capabilities

### Key Design Patterns

1. **Safe Parsing**: All protocol parsers use the `datum` class for bounds-checked parsing
2. **JSON Output**: Custom JSON generator optimized for network metadata
3. **Resource Files**: Encrypted archives containing protocol databases and ML models
4. **Platform Abstraction**: Conditional compilation for Linux (AF_PACKET) vs macOS (PCAP only)

## Important Notes

- Primary development platform is Linux; macOS support is limited to PCAP file processing
- C++17 required for compilation
- Performance testing should use AF_PACKET on Linux for accurate results
- When modifying protocol parsers, ensure datum bounds checking is maintained
- New features should include unit tests in `unit_tests/`
- JSON output must follow guidelines in `doc/guidelines.md`
68 changes: 52 additions & 16 deletions src/af_packet_v3.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>

#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* Needed for gettid() definition from unistd.h */
Expand Down Expand Up @@ -505,7 +506,7 @@ int create_dedicated_socket(struct thread_storage *thread_stor, int fanout_arg)
* The start of each block is a struct tpacket_block_desc so make
* array of pointers to the start of each block struct
*/
struct tpacket_block_desc **block_header = (struct tpacket_block_desc**)malloc(thread_stor->ring_params.tp_block_nr * sizeof(struct tpacket_hdr_v1 *));
struct tpacket_block_desc **block_header = (struct tpacket_block_desc**)malloc(thread_stor->ring_params.tp_block_nr * sizeof(struct tpacket_block_desc *));
if (block_header == NULL) {
fprintf(stderr, "error: could not allocate block_header pointer array for thread %d\n", thread_stor->tnum);
munmap(mapped_buffer, map_buf_len);
Expand Down Expand Up @@ -841,6 +842,17 @@ void *packet_capture_thread_func(void *arg) {
return NULL;
}

/* Helper function to clean up allocated resources */
static void cleanup_resources(struct thread_storage *tstor, int num_threads) {
if (tstor != NULL) {
free(tstor);
}
if (global_thread_stall != NULL) {
free(global_thread_stall);
global_thread_stall = NULL;
}
}

enum status bind_and_dispatch(struct mercury_config *cfg,
mercury_context mc,
struct output_file *out_ctx,
Expand All @@ -849,12 +861,12 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
/* sanity check memory fractions */
if (cfg->buffer_fraction < 0.0 || cfg->buffer_fraction > 1.0 ) {
fprintf(stdout, "error: refusing to allocate buffer fraction %.3f\n", cfg->buffer_fraction);
exit(255);
return status_err;
}

if (cfg->io_balance_frac < 0.0 || cfg->io_balance_frac > 1.0 ) {
fprintf(stdout, "error: refusing to balance io buffers with %.3f\n", cfg->io_balance_frac);
exit(255);
return status_err;
}

/* initialize the ring limits from the configuration */
Expand Down Expand Up @@ -885,15 +897,29 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
statst.verbosity = cfg->verbosity;

struct thread_storage *tstor; // Holds the array of struct thread_storage, one for each thread
tstor = (struct thread_storage *)malloc(num_threads * sizeof(struct thread_storage));
size_t tstor_size = num_threads * sizeof(struct thread_storage);
if (num_threads > 0 && tstor_size / num_threads != sizeof(struct thread_storage)) {
fprintf(stderr, "error: integer overflow in thread storage allocation\n");
return -1;
}
tstor = (struct thread_storage *)malloc(tstor_size);
if (!tstor) {
perror("could not allocate memory for strocut thread_storage array\n");
perror("could not allocate memory for struct thread_storage array\n");
return -1;
}
statst.tstor = tstor; // The stats thread needs to know how to access the socket for each packet worker

global_thread_stall = (struct thread_stall *)malloc((num_threads + 1) * sizeof(struct thread_stall));
size_t stall_size = (num_threads + 1) * sizeof(struct thread_stall);
if (num_threads > SIZE_MAX - 1 || stall_size / (num_threads + 1) != sizeof(struct thread_stall)) {
fprintf(stderr, "error: integer overflow in thread stall allocation\n");
free(tstor);
return -1;
}
global_thread_stall = (struct thread_stall *)malloc(stall_size);
if (!global_thread_stall) {
perror("could not allocate memory for global thread stall structs\n");
free(tstor);
return -1;
}
for (int i = 0; i <= num_threads; i++) {
global_thread_stall[i].used = 0;
Expand Down Expand Up @@ -921,13 +947,15 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
uint32_t thread_ring_blockcount = thread_ring_size / thread_ring_blocksize;
if (thread_ring_blockcount < rl.af_min_blocks) {
fprintf(stderr, "Error: only able to allocate %u blocks per thread (minimum %u)\n", thread_ring_blockcount, rl.af_min_blocks);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

/* blocks must be a multiple of the framesize */
if (thread_ring_blocksize % rl.af_framesize != 0) {
fprintf(stderr, "Error: computed thread blocksize (%u) is not a multiple of the framesize (%u)\n", thread_ring_blocksize, rl.af_framesize);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

if ((uint64_t)num_threads * (uint64_t)thread_ring_blockcount * (uint64_t)thread_ring_blocksize < rl.af_desired_memory) {
Expand Down Expand Up @@ -963,14 +991,16 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
err = pthread_attr_init(&(tstor[thread].thread_attributes));
if (err) {
fprintf(stderr, "%s: error initializing attributes for thread %d\n", strerror(err), thread);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

pthread_mutexattr_t m_attr;
err = pthread_mutexattr_init(&m_attr);
if (err) {
fprintf(stderr, "%s: error initializing block streak mutex attributes for thread %d\n", strerror(err), thread);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

memcpy(&(tstor[thread].ring_params), &thread_ring_req, sizeof(thread_ring_req));
Expand All @@ -979,7 +1009,8 @@ enum status bind_and_dispatch(struct mercury_config *cfg,

if (err != 0) {
fprintf(stderr, "error creating dedicated socket for thread %d\n", thread);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}
}

Expand Down Expand Up @@ -1009,21 +1040,24 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
err = pthread_create(&(statst.tid), NULL, stats_thread_func, &statst);
if (err != 0) {
perror("error creating stats thread");
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

for (int thread = 0; thread < num_threads; thread++) {
pthread_attr_t thread_attributes;
err = pthread_attr_init(&thread_attributes);
if (err) {
fprintf(stderr, "%s: error initializing attributes for thread %d\n", strerror(err), thread);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

err = pthread_create(&(tstor[thread].tid), &thread_attributes, packet_capture_thread_func, &(tstor[thread]));
if (err) {
fprintf(stderr, "%s: error creating af_packet capture thread %d\n", strerror(err), thread);
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}
}

Expand All @@ -1032,7 +1066,8 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
err = pthread_cond_broadcast(&(out_ctx->t_output_c)); /* Wake up output */
if (err != 0) {
printf("%s: error broadcasting all clear on output start condition\n", strerror(err));
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

/* At this point all threads are started but they're waiting on
Expand All @@ -1042,7 +1077,8 @@ enum status bind_and_dispatch(struct mercury_config *cfg,
err = pthread_cond_broadcast(&t_start_c); // Wake up all the waiting threads
if (err != 0) {
printf("%s: error broadcasting all clear on clean start condition\n", strerror(err));
exit(255);
cleanup_resources(tstor, num_threads);
return status_err;
}

/* Wait for the stats thread to close (which only happens on a sigint/sigterm) */
Expand Down
7 changes: 6 additions & 1 deletion src/batch_gcd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,12 @@ class pem_mpz_reader : public mpz_reader {
++linenum;
if (*next_line_to_write == linenum) {
std::string filename = base_filename + "-line-" + std::to_string(linenum) + ".cert.pem";
write_pem(fopen(filename.c_str(), "w+"), pemdata.data, pemdata.length(), "CERTIFICATE");
FILE *fp = fopen(filename.c_str(), "w+");
if (fp != NULL) {
write_pem(fp, pemdata.data, pemdata.length(), "CERTIFICATE");
} else {
fprintf(stderr, "error: could not open file %s for writing\n", filename.c_str());
}
++next_line_to_write;
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/capture.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
*/
enum status bind_and_dispatch(struct mercury_config *,
mercury_context,
struct output_file *) {
struct output_file *,
struct cap_stats *) {

fprintf(stderr, "error: packet capture is unavailable; AF_PACKET TPACKETv3 not present\n");

Expand Down
3 changes: 2 additions & 1 deletion src/intercept.cc
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,8 @@ struct daemon_output : public output {
perror("opening datagram socket");
}
name.sun_family = AF_UNIX;
strcpy(name.sun_path, socket_name);
strncpy(name.sun_path, socket_name, sizeof(name.sun_path) - 1);
name.sun_path[sizeof(name.sun_path) - 1] = '\0';

}
void write_buffer(struct buffer_stream &buf) {
Expand Down
3 changes: 2 additions & 1 deletion src/intercept_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ int main(int argc, char *argv[]) {
//
struct sockaddr_un name;
name.sun_family = AF_UNIX;
strcpy(name.sun_path, SOCKET_PATH);
strncpy(name.sun_path, SOCKET_PATH, sizeof(name.sun_path) - 1);
name.sun_path[sizeof(name.sun_path) - 1] = '\0';

// if an old copy of the named socket is still around, remove it
//
Expand Down
4 changes: 3 additions & 1 deletion src/json_file_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ enum status json_file_rotate(struct json_file *jf) {

char time_str[128];
struct timeval now;
struct tm timeinfo;
gettimeofday(&now, NULL);
strftime(time_str, sizeof(time_str) - 1, "%Y%m%d%H%M%S", localtime(&now.tv_sec));
localtime_r(&now.tv_sec, &timeinfo);
strftime(time_str, sizeof(time_str) - 1, "%Y%m%d%H%M%S", &timeinfo);
status = filename_append(outfile, outfile, "-", time_str);
if (status) {
return status;
Expand Down
6 changes: 4 additions & 2 deletions src/libmerc/addr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,15 @@ void subnet_data::process_final() {
if (tmp != NULL) {
prefix = tmp;
} else {
return; // TODO: leak check
free(prefix);
return; // cleaned up
}

// allocate a buffer for the IP stats
lct_ip_stats_t *stats = (lct_ip_stats_t *) calloc(num, sizeof(lct_ip_stats_t));
if (!stats) {
return; // TODO: leak check
free(prefix);
return; // cleaned up
}

// count which subnets are prefixes of other subnets
Expand Down
Loading